src/gpodder/model.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20
  21 #
  22 #  gpodder.model - Core model classes for gPodder (2009-08-13)
  23 #  Based on libpodcasts.py (thp, 2005-10-29)
  24 #
  25
  26 import gpodder
  27 from gpodder import util
  28 from gpodder import feedcore
  29 from gpodder import youtube
  30 from gpodder import corestats
  31 from gpodder import gstreamer
  32
  33 from gpodder.liblogger import log
  34
  35 import os
  36 import re
  37 import glob
  38 import shutil
  39 import time
  40 import datetime
  41 import rfc822
  42 import hashlib
  43 import feedparser
  44 import xml.sax.saxutils
  45
  46 _ = gpodder.gettext
  47
  48
  49 class CustomFeed(feedcore.ExceptionWithData): pass
  50
  51 class gPodderFetcher(feedcore.Fetcher):
  52     """
  53     This class extends the feedcore Fetcher with the gPodder User-Agent and the
  54     Proxy handler based on the current settings in gPodder and provides a
  55     convenience method (fetch_channel) for use by PodcastChannel objects.
  56     """
  57     custom_handlers = []
  58
  59     def __init__(self):
  60         feedcore.Fetcher.__init__(self, gpodder.user_agent)
  61
  62     def fetch_channel(self, channel):
  63         etag = channel.etag
  64         modified = feedparser._parse_date(channel.last_modified)
  65         # If we have a username or password, rebuild the url with them included
  66         # Note: using a HTTPBasicAuthHandler would be pain because we need to
  67         # know the realm. It can be done, but I think this method works, too
  68         url = channel.authenticate_url(channel.url)
  69         for handler in self.custom_handlers:
  70             custom_feed = handler.handle_url(url)
  71             if custom_feed is not None:
  72                 raise CustomFeed(custom_feed)
  73         self.fetch(url, etag, modified)
  74
  75     def _resolve_url(self, url):
  76         return youtube.get_real_channel_url(url)
  77
  78     @classmethod
  79     def register(cls, handler):
  80         cls.custom_handlers.append(handler)
  81
  82 #    def _get_handlers(self):
  83 #        # Add a ProxyHandler for fetching data via a proxy server
  84 #        proxies = {'http': 'http://proxy.example.org:8080'}
  85 #        return[urllib2.ProxyHandler(proxies))]
  86
  87 # The "register" method is exposed here for external usage
  88 register_custom_handler = gPodderFetcher.register
  89
  90 class PodcastModelObject(object):
  91     """
  92     A generic base class for our podcast model providing common helper
  93     and utility functions.
  94     """
  95
  96     @classmethod
  97     def create_from_dict(cls, d, *args):
  98         """
  99         Create a new object, passing "args" to the constructor
 100         and then updating the object with the values from "d".
 101         """
 102         o = cls(*args)
 103         o.update_from_dict(d)
 104         return o
 105
 106     def update_from_dict(self, d):
 107         """
 108         Updates the attributes of this object with values from the
 109         dictionary "d" by using the keys found in "d".
 110         """
 111         for k in d:
 112             if hasattr(self, k):
 113                 setattr(self, k, d[k])
 114
 115
 116 class PodcastChannel(PodcastModelObject):
 117     """holds data for a complete channel"""
 118     MAX_FOLDERNAME_LENGTH = 150
 119     SECONDS_PER_WEEK = 7*24*60*60
 120
 121     feed_fetcher = gPodderFetcher()
 122
 123     @classmethod
 124     def build_factory(cls, download_dir):
 125         def factory(dict, db):
 126             return cls.create_from_dict(dict, db, download_dir)
 127         return factory
 128
 129     @classmethod
 130     def load_from_db(cls, db, download_dir):
 131         return db.load_channels(factory=cls.build_factory(download_dir))
 132
 133     @classmethod
 134     def load(cls, db, url, create=True, authentication_tokens=None,\
 135             max_episodes=0, download_dir=None, allow_empty_feeds=False):
 136         if isinstance(url, unicode):
 137             url = url.encode('utf-8')
 138
 139         tmp = db.load_channels(factory=cls.build_factory(download_dir), url=url)
 140         if len(tmp):
 141             return tmp[0]
 142         elif create:
 143             tmp = PodcastChannel(db, download_dir)
 144             tmp.url = url
 145             if authentication_tokens is not None:
 146                 tmp.username = authentication_tokens[0]
 147                 tmp.password = authentication_tokens[1]
 148
 149             tmp.update(max_episodes)
 150             tmp.save()
 151             db.force_last_new(tmp)
 152             # Subscribing to empty feeds should yield an error (except if
 153             # the user specifically allows empty feeds in the config UI)
 154             if sum(tmp.get_statistics()) == 0 and not allow_empty_feeds:
 155                 tmp.delete()
 156                 raise Exception(_('No downloadable episodes in feed'))
 157             return tmp
 158
 159     def episode_factory(self, d, db__parameter_is_unused=None):
 160         """
 161         This function takes a dictionary containing key-value pairs for
 162         episodes and returns a new PodcastEpisode object that is connected
 163         to this PodcastChannel object.
 164
 165         Returns: A new PodcastEpisode object
 166         """
 167         return PodcastEpisode.create_from_dict(d, self)
 168
 169     def _consume_custom_feed(self, custom_feed, max_episodes=0):
 170         self.title = custom_feed.get_title()
 171         self.link = custom_feed.get_link()
 172         self.description = custom_feed.get_description()
 173         self.image = custom_feed.get_image()
 174         self.pubDate = time.time()
 175         self.save()
 176
 177         guids = [episode.guid for episode in self.get_all_episodes()]
 178
 179         # Insert newly-found episodes into the database
 180         custom_feed.get_new_episodes(self, guids)
 181
 182         self.save()
 183
 184         self.db.purge(max_episodes, self.id)
 185
 186     def _consume_updated_feed(self, feed, max_episodes=0):
 187         self.parse_error = feed.get('bozo_exception', None)
 188
 189         self.title = feed.feed.get('title', self.url)
 190         self.link = feed.feed.get('link', self.link)
 191         self.description = feed.feed.get('subtitle', self.description)
 192         # Start YouTube-specific title FIX
 193         YOUTUBE_PREFIX = 'Uploads by '
 194         if self.title.startswith(YOUTUBE_PREFIX):
 195             self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
 196         # End YouTube-specific title FIX
 197
 198         try:
 199             self.pubDate = rfc822.mktime_tz(feed.feed.get('updated_parsed', None+(0,)))
 200         except:
 201             self.pubDate = time.time()
 202
 203         if hasattr(feed.feed, 'image'):
 204             for attribute in ('href', 'url'):
 205                 new_value = getattr(feed.feed.image, attribute, None)
 206                 if new_value is not None:
 207                     log('Found cover art in %s: %s', attribute, new_value)
 208                     self.image = new_value
 209
 210         if hasattr(feed.feed, 'icon'):
 211             self.image = feed.feed.icon
 212
 213         self.save()
 214
 215         # Load all episodes to update them properly.
 216         existing = self.get_all_episodes()
 217
 218         # We can limit the maximum number of entries that gPodder will parse
 219         if max_episodes > 0 and len(feed.entries) > max_episodes:
 220             entries = feed.entries[:max_episodes]
 221         else:
 222             entries = feed.entries
 223
 224         # Title + PubDate hashes for existing episodes
 225         existing_dupes = dict((e.duplicate_id(), e) for e in existing)
 226
 227         # GUID-based existing episode list
 228         existing_guids = dict((e.guid, e) for e in existing)
 229
 230         # Get most recent pubDate of all episodes
 231         last_pubdate = self.db.get_last_pubdate(self) or 0
 232
 233         # Search all entries for new episodes
 234         for entry in entries:
 235             try:
 236                 episode = PodcastEpisode.from_feedparser_entry(entry, self)
 237                 if episode is not None and not episode.title:
 238                     episode.title, ext = os.path.splitext(os.path.basename(episode.url))
 239             except Exception, e:
 240                 log('Cannot instantiate episode: %s. Skipping.', e, sender=self, traceback=True)
 241                 continue
 242
 243             if episode is None:
 244                 continue
 245
 246             # Detect (and update) existing episode based on GUIDs
 247             existing_episode = existing_guids.get(episode.guid, None)
 248             if existing_episode:
 249                 existing_episode.update_from(episode)
 250                 existing_episode.save()
 251                 continue
 252
 253             # Detect (and update) existing episode based on duplicate ID
 254             existing_episode = existing_dupes.get(episode.duplicate_id(), None)
 255             if existing_episode:
 256                 if existing_episode.is_duplicate(episode):
 257                     existing_episode.update_from(episode)
 258                     existing_episode.save()
 259                     continue
 260
 261             # Workaround for bug 340: If the episode has been
 262             # published earlier than one week before the most
 263             # recent existing episode, do not mark it as new.
 264             if episode.pubDate < last_pubdate - self.SECONDS_PER_WEEK:
 265                 log('Episode with old date: %s', episode.title, sender=self)
 266                 episode.is_played = True
 267
 268             episode.save()
 269
 270         # Remove "unreachable" episodes - episodes that have not been
 271         # downloaded and that the feed does not list as downloadable anymore
 272         if self.id is not None:
 273             seen_guids = set(e.guid for e in feed.entries if hasattr(e, 'guid'))
 274             episodes_to_purge = (e for e in existing if \
 275                     e.state != gpodder.STATE_DOWNLOADED and \
 276                     e.guid not in seen_guids and e.guid is not None)
 277             for episode in episodes_to_purge:
 278                 log('Episode removed from feed: %s (%s)', episode.title, \
 279                         episode.guid, sender=self)
 280                 self.db.delete_episode_by_guid(episode.guid, self.id)
 281
 282         # This *might* cause episodes to be skipped if there were more than
 283         # max_episodes_per_feed items added to the feed between updates.
 284         # The benefit is that it prevents old episodes from apearing as new
 285         # in certain situations (see bug #340).
 286         self.db.purge(max_episodes, self.id)
 287
 288     def update_channel_lock(self):
 289         self.db.update_channel_lock(self)
 290
 291     def _update_etag_modified(self, feed):
 292         self.updated_timestamp = time.time()
 293         self.calculate_publish_behaviour()
 294         self.etag = feed.headers.get('etag', self.etag)
 295         self.last_modified = feed.headers.get('last-modified', self.last_modified)
 296
 297     def query_automatic_update(self):
 298         """Query if this channel should be updated automatically
 299
 300         Returns True if the update should happen in automatic
 301         mode or False if this channel should be skipped (timeout
 302         not yet reached or release not expected right now).
 303         """
 304         updated = self.updated_timestamp
 305         expected = self.release_expected
 306
 307         now = time.time()
 308         one_day_ago = now - 60*60*24
 309         lastcheck = now - 60*10
 310
 311         return updated < one_day_ago or \
 312                 (expected < now and updated < lastcheck)
 313
 314     def update(self, max_episodes=0):
 315         try:
 316             self.feed_fetcher.fetch_channel(self)
 317         except CustomFeed, updated:
 318             custom_feed = updated.data
 319             self._consume_custom_feed(custom_feed, max_episodes)
 320             self.save()
 321         except feedcore.UpdatedFeed, updated:
 322             feed = updated.data
 323             self._consume_updated_feed(feed, max_episodes)
 324             self._update_etag_modified(feed)
 325             self.save()
 326         except feedcore.NewLocation, updated:
 327             feed = updated.data
 328             self.url = feed.href
 329             self._consume_updated_feed(feed, max_episodes)
 330             self._update_etag_modified(feed)
 331             self.save()
 332         except feedcore.NotModified, updated:
 333             feed = updated.data
 334             self._update_etag_modified(feed)
 335             self.save()
 336         except Exception, e:
 337             # "Not really" errors
 338             #feedcore.AuthenticationRequired
 339             # Temporary errors
 340             #feedcore.Offline
 341             #feedcore.BadRequest
 342             #feedcore.InternalServerError
 343             #feedcore.WifiLogin
 344             # Permanent errors
 345             #feedcore.Unsubscribe
 346             #feedcore.NotFound
 347             #feedcore.InvalidFeed
 348             #feedcore.UnknownStatusCode
 349             raise
 350
 351         self.db.commit()
 352
 353     def delete(self):
 354         self.db.delete_channel(self)
 355
 356     def save(self):
 357         self.db.save_channel(self)
 358
 359     def get_statistics(self):
 360         if self.id is None:
 361             return (0, 0, 0, 0, 0)
 362         else:
 363             return self.db.get_channel_count(int(self.id))
 364
 365     def authenticate_url(self, url):
 366         return util.url_add_authentication(url, self.username, self.password)
 367
 368     def __init__(self, db, download_dir):
 369         self.db = db
 370         self.download_dir = download_dir
 371         self.id = None
 372         self.url = None
 373         self.title = ''
 374         self.link = ''
 375         self.description = ''
 376         self.image = None
 377         self.pubDate = 0
 378         self.parse_error = None
 379         self.foldername = None
 380         self.auto_foldername = 1 # automatically generated foldername
 381
 382         # should this channel be synced to devices? (ex: iPod)
 383         self.sync_to_devices = True
 384         # to which playlist should be synced
 385         self.device_playlist_name = 'gPodder'
 386         # if set, this overrides the channel-provided title
 387         self.override_title = ''
 388         self.username = ''
 389         self.password = ''
 390
 391         self.last_modified = None
 392         self.etag = None
 393
 394         self.save_dir_size = 0
 395         self.__save_dir_size_set = False
 396
 397         self.channel_is_locked = False
 398
 399         self.release_expected = time.time()
 400         self.release_deviation = 0
 401         self.updated_timestamp = 0
 402
 403     def calculate_publish_behaviour(self):
 404         episodes = self.db.load_episodes(self, factory=self.episode_factory, limit=30)
 405         if len(episodes) < 3:
 406             return
 407
 408         deltas = []
 409         latest = max(e.pubDate for e in episodes)
 410         for index in range(len(episodes)-1):
 411             if episodes[index].pubDate != 0 and episodes[index+1].pubDate != 0:
 412                 deltas.append(episodes[index].pubDate - episodes[index+1].pubDate)
 413
 414         if len(deltas) > 1:
 415             stats = corestats.Stats(deltas)
 416             self.release_expected = min([latest+stats.stdev(), latest+(stats.min()+stats.avg())*.5])
 417             self.release_deviation = stats.stdev()
 418         else:
 419             self.release_expected = latest
 420             self.release_deviation = 0
 421
 422     def request_save_dir_size(self):
 423         if not self.__save_dir_size_set:
 424             self.update_save_dir_size()
 425         self.__save_dir_size_set = True
 426
 427     def update_save_dir_size(self):
 428         self.save_dir_size = util.calculate_size(self.save_dir)
 429
 430     def get_title( self):
 431         if self.override_title:
 432             return self.override_title
 433         elif not self.__title.strip():
 434             return self.url
 435         else:
 436             return self.__title
 437
 438     def set_title( self, value):
 439         self.__title = value.strip()
 440
 441     title = property(fget=get_title,
 442                      fset=set_title)
 443
 444     def set_custom_title( self, custom_title):
 445         custom_title = custom_title.strip()
 446
 447         # if the custom title is the same as we have
 448         if custom_title == self.override_title:
 449             return
 450
 451         # if custom title is the same as channel title and we didn't have a custom title
 452         if custom_title == self.__title and self.override_title == '':
 453             return
 454
 455         # make sure self.foldername is initialized
 456         self.get_save_dir()
 457
 458         # rename folder if custom_title looks sane
 459         new_folder_name = self.find_unique_folder_name(custom_title)
 460         if len(new_folder_name) > 0 and new_folder_name != self.foldername:
 461             log('Changing foldername based on custom title: %s', custom_title, sender=self)
 462             new_folder = os.path.join(self.download_dir, new_folder_name)
 463             old_folder = os.path.join(self.download_dir, self.foldername)
 464             if os.path.exists(old_folder):
 465                 if not os.path.exists(new_folder):
 466                     # Old folder exists, new folder does not -> simply rename
 467                     log('Renaming %s => %s', old_folder, new_folder, sender=self)
 468                     os.rename(old_folder, new_folder)
 469                 else:
 470                     # Both folders exist -> move files and delete old folder
 471                     log('Moving files from %s to %s', old_folder, new_folder, sender=self)
 472                     for file in glob.glob(os.path.join(old_folder, '*')):
 473                         shutil.move(file, new_folder)
 474                     log('Removing %s', old_folder, sender=self)
 475                     shutil.rmtree(old_folder, ignore_errors=True)
 476             self.foldername = new_folder_name
 477             self.save()
 478
 479         if custom_title != self.__title:
 480             self.override_title = custom_title
 481         else:
 482             self.override_title = ''
 483
 484     def get_downloaded_episodes(self):
 485         return self.db.load_episodes(self, factory=self.episode_factory, state=gpodder.STATE_DOWNLOADED)
 486
 487     def get_new_episodes(self, downloading=lambda e: False):
 488         """
 489         Get a list of new episodes. You can optionally specify
 490         "downloading" as a callback that takes an episode as
 491         a parameter and returns True if the episode is currently
 492         being downloaded or False if not.
 493
 494         By default, "downloading" is implemented so that it
 495         reports all episodes as not downloading.
 496         """
 497         return [episode for episode in self.db.load_episodes(self, \
 498                 factory=self.episode_factory, state=gpodder.STATE_NORMAL) if \
 499                 episode.check_is_new(downloading=downloading)]
 500
 501     def get_playlist_filename(self):
 502         # If the save_dir doesn't end with a slash (which it really should
 503         # not, if the implementation is correct, we can just append .m3u :)
 504         assert self.save_dir[-1] != '/'
 505         return self.save_dir+'.m3u'
 506
 507     def update_m3u_playlist(self):
 508         m3u_filename = self.get_playlist_filename()
 509
 510         downloaded_episodes = self.get_downloaded_episodes()
 511         if not downloaded_episodes:
 512             log('No episodes - removing %s', m3u_filename, sender=self)
 513             util.delete_file(m3u_filename)
 514             return
 515
 516         log('Writing playlist to %s', m3u_filename, sender=self)
 517         f = open(m3u_filename, 'w')
 518         f.write('#EXTM3U\n')
 519
 520         for episode in PodcastEpisode.sort_by_pubdate(downloaded_episodes):
 521             if episode.was_downloaded(and_exists=True):
 522                 filename = episode.local_filename(create=False)
 523                 assert filename is not None
 524
 525                 if os.path.dirname(filename).startswith(os.path.dirname(m3u_filename)):
 526                     filename = filename[len(os.path.dirname(m3u_filename)+os.sep):]
 527                 f.write('#EXTINF:0,'+self.title+' - '+episode.title+' ('+episode.cute_pubdate()+')\n')
 528                 f.write(filename+'\n')
 529
 530         f.close()
 531
 532     def get_all_episodes(self):
 533         return self.db.load_episodes(self, factory=self.episode_factory)
 534
 535     def find_unique_folder_name(self, foldername):
 536         # Remove trailing dots to avoid errors on Windows (bug 600)
 537         foldername = foldername.strip().rstrip('.')
 538
 539         current_try = util.sanitize_filename(foldername, \
 540                 self.MAX_FOLDERNAME_LENGTH)
 541         next_try_id = 2
 542
 543         while True:
 544             if self.db.channel_foldername_exists(current_try):
 545                 current_try = '%s (%d)' % (foldername, next_try_id)
 546                 next_try_id += 1
 547             else:
 548                 return current_try
 549
 550     def get_save_dir(self):
 551         urldigest = hashlib.md5(self.url).hexdigest()
 552         sanitizedurl = util.sanitize_filename(self.url, self.MAX_FOLDERNAME_LENGTH)
 553         if self.foldername is None or (self.auto_foldername and (self.foldername == urldigest or self.foldername.startswith(sanitizedurl))):
 554             # we must change the folder name, because it has not been set manually
 555             fn_template = util.sanitize_filename(self.title, self.MAX_FOLDERNAME_LENGTH)
 556
 557             # if this is an empty string, try the basename
 558             if len(fn_template) == 0:
 559                 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
 560                 fn_template = util.sanitize_filename(os.path.basename(self.url), self.MAX_FOLDERNAME_LENGTH)
 561
 562             # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
 563             if len(fn_template) == 0:
 564                 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
 565                 fn_template = urldigest # no need for sanitize_filename here
 566
 567             # Find a unique folder name for this podcast
 568             wanted_foldername = self.find_unique_folder_name(fn_template)
 569
 570             # if the foldername has not been set, check if the (old) md5 filename exists
 571             if self.foldername is None and os.path.exists(os.path.join(self.download_dir, urldigest)):
 572                 log('Found pre-0.15.0 download folder for %s: %s', self.title, urldigest, sender=self)
 573                 self.foldername = urldigest
 574
 575             # we have a valid, new folder name in "current_try" -> use that!
 576             if self.foldername is not None and wanted_foldername != self.foldername:
 577                 # there might be an old download folder crawling around - move it!
 578                 new_folder_name = os.path.join(self.download_dir, wanted_foldername)
 579                 old_folder_name = os.path.join(self.download_dir, self.foldername)
 580                 if os.path.exists(old_folder_name):
 581                     if not os.path.exists(new_folder_name):
 582                         # Old folder exists, new folder does not -> simply rename
 583                         log('Renaming %s => %s', old_folder_name, new_folder_name, sender=self)
 584                         os.rename(old_folder_name, new_folder_name)
 585                     else:
 586                         # Both folders exist -> move files and delete old folder
 587                         log('Moving files from %s to %s', old_folder_name, new_folder_name, sender=self)
 588                         for file in glob.glob(os.path.join(old_folder_name, '*')):
 589                             shutil.move(file, new_folder_name)
 590                         log('Removing %s', old_folder_name, sender=self)
 591                         shutil.rmtree(old_folder_name, ignore_errors=True)
 592             log('Updating foldername of %s to "%s".', self.url, wanted_foldername, sender=self)
 593             self.foldername = wanted_foldername
 594             self.save()
 595
 596         save_dir = os.path.join(self.download_dir, self.foldername)
 597
 598         # Create save_dir if it does not yet exist
 599         if not util.make_directory( save_dir):
 600             log( 'Could not create save_dir: %s', save_dir, sender = self)
 601
 602         return save_dir
 603
 604     save_dir = property(fget=get_save_dir)
 605
 606     def remove_downloaded( self):
 607         shutil.rmtree( self.save_dir, True)
 608
 609     @property
 610     def cover_file(self):
 611         new_name = os.path.join(self.save_dir, 'folder.jpg')
 612         if not os.path.exists(new_name):
 613             old_names = ('cover', '.cover')
 614             for old_name in old_names:
 615                 filename = os.path.join(self.save_dir, old_name)
 616                 if os.path.exists(filename):
 617                     shutil.move(filename, new_name)
 618                     return new_name
 619
 620         return new_name
 621
 622     def delete_episode(self, episode):
 623         filename = episode.local_filename(create=False, check_only=True)
 624         if filename is not None:
 625             util.delete_file(filename)
 626
 627         episode.set_state(gpodder.STATE_DELETED)
 628
 629
 630 class PodcastEpisode(PodcastModelObject):
 631     """holds data for one object in a channel"""
 632     MAX_FILENAME_LENGTH = 200
 633
 634     def _get_played(self):
 635         return self.is_played
 636
 637     def _set_played(self, played):
 638         self.is_played = played
 639
 640     # Alias "is_played" to "played" for DB column mapping
 641     played = property(fget=_get_played, fset=_set_played)
 642
 643     def _get_locked(self):
 644         return self.is_locked
 645
 646     def _set_locked(self, locked):
 647         self.is_locked = locked
 648
 649     # Alias "is_locked" to "locked" for DB column mapping
 650     locked = property(fget=_get_locked, fset=_set_locked)
 651
 652     def _get_channel_id(self):
 653         return self.channel.id
 654
 655     def _set_channel_id(self, channel_id):
 656         assert self.channel.id == channel_id
 657
 658     # Accessor for the "channel_id" DB column
 659     channel_id = property(fget=_get_channel_id, fset=_set_channel_id)
 660
 661     @staticmethod
 662     def sort_by_pubdate(episodes, reverse=False):
 663         """Sort a list of PodcastEpisode objects chronologically
 664
 665         Returns a iterable, sorted sequence of the episodes
 666         """
 667         key_pubdate = lambda e: e.pubDate
 668         return sorted(episodes, key=key_pubdate, reverse=reverse)
 669
 670     def reload_from_db(self):
 671         """
 672         Re-reads all episode details for this object from the
 673         database and updates this object accordingly. Can be
 674         used to refresh existing objects when the database has
 675         been updated (e.g. the filename has been set after a
 676         download where it was not set before the download)
 677         """
 678         d = self.db.load_episode(self.id)
 679         self.update_from_dict(d or {})
 680         return self
 681
 682     def has_website_link(self):
 683         return bool(self.link) and (self.link != self.url or \
 684                 youtube.is_video_link(self.link))
 685
 686     @staticmethod
 687     def from_feedparser_entry(entry, channel):
 688         episode = PodcastEpisode(channel)
 689
 690         episode.title = entry.get('title', '')
 691         episode.link = entry.get('link', '')
 692         episode.description = entry.get('summary', '')
 693
 694         try:
 695             # Parse iTunes-specific podcast duration metadata
 696             total_time = util.parse_time(entry.get('itunes_duration', ''))
 697             episode.total_time = total_time
 698         except:
 699             pass
 700
 701         # Fallback to subtitle if summary is not available0
 702         if not episode.description:
 703             episode.description = entry.get('subtitle', '')
 704
 705         episode.guid = entry.get('id', '')
 706         if entry.get('updated_parsed', None):
 707             episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))
 708
 709         enclosures = entry.get('enclosures', ())
 710         audio_available = any(e.get('type', '').startswith('audio/') \
 711                 for e in enclosures)
 712         video_available = any(e.get('type', '').startswith('video/') \
 713                 for e in enclosures)
 714
 715         # Enclosures
 716         for e in enclosures:
 717             episode.mimetype = e.get('type', 'application/octet-stream')
 718             if episode.mimetype == '':
 719                 # See Maemo bug 10036
 720                 log('Fixing empty mimetype in ugly feed', sender=episode)
 721                 episode.mimetype = 'application/octet-stream'
 722
 723             if '/' not in episode.mimetype:
 724                 continue
 725
 726             # Skip images in feeds if audio or video is available (bug 979)
 727             if episode.mimetype.startswith('image/') and \
 728                     (audio_available or video_available):
 729                 continue
 730
 731             episode.url = util.normalize_feed_url(e.get('href', ''))
 732             if not episode.url:
 733                 continue
 734
 735             try:
 736                 episode.length = int(e.length) or -1
 737             except:
 738                 episode.length = -1
 739
 740             return episode
 741
 742         # Media RSS content
 743         for m in entry.get('media_content', ()):
 744             episode.mimetype = m.get('type', 'application/octet-stream')
 745             if '/' not in episode.mimetype:
 746                 continue
 747
 748             episode.url = util.normalize_feed_url(m.get('url', ''))
 749             if not episode.url:
 750                 continue
 751
 752             try:
 753                 episode.length = int(m.fileSize) or -1
 754             except:
 755                 episode.length = -1
 756
 757             return episode
 758
 759         # Brute-force detection of any links
 760         for l in entry.get('links', ()):
 761             episode.url = util.normalize_feed_url(l.get('href', ''))
 762             if not episode.url:
 763                 continue
 764
 765             if youtube.is_video_link(episode.url):
 766                 return episode
 767
 768             # Check if we can resolve this link to a audio/video file
 769             filename, extension = util.filename_from_url(episode.url)
 770             file_type = util.file_type_by_extension(extension)
 771             if file_type is None and hasattr(l, 'type'):
 772                 extension = util.extension_from_mimetype(l.type)
 773                 file_type = util.file_type_by_extension(extension)
 774
 775             # The link points to a audio or video file - use it!
 776             if file_type is not None:
 777                 return episode
 778
 779         # Scan MP3 links in description text
 780         mp3s = re.compile(r'http://[^"]*\.mp3')
 781         for content in entry.get('content', ()):
 782             html = content.value
 783             for match in mp3s.finditer(html):
 784                 episode.url = match.group(0)
 785                 return episode
 786
 787         return None
 788
 789     def __init__(self, channel):
 790         self.db = channel.db
 791         # Used by Storage for faster saving
 792         self.id = None
 793         self.url = ''
 794         self.title = ''
 795         self.length = 0
 796         self.mimetype = 'application/octet-stream'
 797         self.guid = ''
 798         self.description = ''
 799         self.link = ''
 800         self.channel = channel
 801         self.pubDate = 0
 802         self.filename = None
 803         self.auto_filename = 1 # automatically generated filename
 804
 805         self.state = gpodder.STATE_NORMAL
 806         self.is_played = False
 807
 808         # Initialize the "is_locked" property
 809         self._is_locked = False
 810         self.is_locked = channel.channel_is_locked
 811
 812         # Time attributes
 813         self.total_time = 0
 814         self.current_position = 0
 815         self.current_position_updated = 0
 816
 817     def get_is_locked(self):
 818         return self._is_locked
 819
 820     def set_is_locked(self, is_locked):
 821         self._is_locked = bool(is_locked)
 822
 823     is_locked = property(fget=get_is_locked, fset=set_is_locked)
 824
 825     def save(self):
 826         if self.state != gpodder.STATE_DOWNLOADED and self.file_exists():
 827             self.state = gpodder.STATE_DOWNLOADED
 828         self.db.save_episode(self)
 829
 830     def on_downloaded(self, filename):
 831         self.state = gpodder.STATE_DOWNLOADED
 832         self.is_played = False
 833         self.length = os.path.getsize(filename)
 834
 835         if not self.total_time:
 836             try:
 837                 length = gstreamer.get_track_length(filename)
 838                 if length is not None:
 839                     length = int(length/1000)
 840                     log('Detected media length: %d seconds', length, \
 841                             sender=self)
 842                     self.total_time = length
 843                     self.db.save_episode(self)
 844                     self.db.commit()
 845                     return
 846             except Exception, e:
 847                 log('Error while detecting media length: %s', str(e), \
 848                         sender=self)
 849
 850         self.db.save_downloaded_episode(self)
 851         self.db.commit()
 852
 853     def set_state(self, state):
 854         self.state = state
 855         self.db.update_episode_state(self)
 856
 857     def mark(self, state=None, is_played=None, is_locked=None):
 858         if state is not None:
 859             self.state = state
 860         if is_played is not None:
 861             self.is_played = is_played
 862         if is_locked is not None:
 863             self.is_locked = is_locked
 864         self.db.update_episode_state(self)
 865
 866     @property
 867     def title_markup(self):
 868         return '%s\n<small>%s</small>' % (xml.sax.saxutils.escape(self.title),
 869                           xml.sax.saxutils.escape(self.channel.title))
 870
 871     @property
 872     def maemo_markup(self):
 873         if self.length > 0:
 874             length_str = '%s; ' % self.filesize_prop
 875         else:
 876             length_str = ''
 877         return ('<b>%s</b>\n<small>%s'+_('released %s')+ \
 878                 '; '+_('from %s')+'</small>') % (\
 879                 xml.sax.saxutils.escape(self.title), \
 880                 xml.sax.saxutils.escape(length_str), \
 881                 xml.sax.saxutils.escape(self.pubdate_prop), \
 882                 xml.sax.saxutils.escape(self.channel.title))
 883
 884     @property
 885     def maemo_remove_markup(self):
 886         if self.is_played:
 887             played_string = _('played')
 888         else:
 889             played_string = _('unplayed')
 890         downloaded_string = self.get_age_string()
 891         if not downloaded_string:
 892             downloaded_string = _('today')
 893         return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
 894                 '; '+_('from %s')+'</small>') % (\
 895                 xml.sax.saxutils.escape(self.title), \
 896                 xml.sax.saxutils.escape(self.filesize_prop), \
 897                 xml.sax.saxutils.escape(played_string), \
 898                 xml.sax.saxutils.escape(downloaded_string), \
 899                 xml.sax.saxutils.escape(self.channel.title))
 900
 901     def age_in_days(self):
 902         return util.file_age_in_days(self.local_filename(create=False, \
 903                 check_only=True))
 904
 905     def get_age_string(self):
 906         return util.file_age_to_string(self.age_in_days())
 907
 908     age_prop = property(fget=get_age_string)
 909
 910     def one_line_description( self):
 911         lines = util.remove_html_tags(self.description).strip().splitlines()
 912         if not lines or lines[0] == '':
 913             return _('No description available')
 914         else:
 915             return ' '.join(lines)
 916
 917     def delete_from_disk(self):
 918         try:
 919             self.channel.delete_episode(self)
 920         except:
 921             log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self)
 922
 923     def find_unique_file_name(self, url, filename, extension):
 924         current_try = util.sanitize_filename(filename, self.MAX_FILENAME_LENGTH)+extension
 925         next_try_id = 2
 926         lookup_url = None
 927
 928         if self.filename == current_try and current_try is not None:
 929             # We already have this filename - good!
 930             return current_try
 931
 932         while self.db.episode_filename_exists(current_try):
 933             current_try = '%s (%d)%s' % (filename, next_try_id, extension)
 934             next_try_id += 1
 935
 936         return current_try
 937
 938     def local_filename(self, create, force_update=False, check_only=False,
 939             template=None):
 940         """Get (and possibly generate) the local saving filename
 941
 942         Pass create=True if you want this function to generate a
 943         new filename if none exists. You only want to do this when
 944         planning to create/download the file after calling this function.
 945
 946         Normally, you should pass create=False. This will only
 947         create a filename when the file already exists from a previous
 948         version of gPodder (where we used md5 filenames). If the file
 949         does not exist (and the filename also does not exist), this
 950         function will return None.
 951
 952         If you pass force_update=True to this function, it will try to
 953         find a new (better) filename and move the current file if this
 954         is the case. This is useful if (during the download) you get
 955         more information about the file, e.g. the mimetype and you want
 956         to include this information in the file name generation process.
 957
 958         If check_only=True is passed to this function, it will never try
 959         to rename the file, even if would be a good idea. Use this if you
 960         only want to check if a file exists.
 961
 962         If "template" is specified, it should be a filename that is to
 963         be used as a template for generating the "real" filename.
 964
 965         The generated filename is stored in the database for future access.
 966         """
 967         ext = self.extension(may_call_local_filename=False).encode('utf-8', 'ignore')
 968
 969         # For compatibility with already-downloaded episodes, we
 970         # have to know md5 filenames if they are downloaded already
 971         urldigest = hashlib.md5(self.url).hexdigest()
 972
 973         if not create and self.filename is None:
 974             urldigest_filename = os.path.join(self.channel.save_dir, urldigest+ext)
 975             if os.path.exists(urldigest_filename):
 976                 # The file exists, so set it up in our database
 977                 log('Recovering pre-0.15.0 file: %s', urldigest_filename, sender=self)
 978                 self.filename = urldigest+ext
 979                 self.auto_filename = 1
 980                 self.save()
 981                 return urldigest_filename
 982             return None
 983
 984         # We only want to check if the file exists, so don't try to
 985         # rename the file, even if it would be reasonable. See also:
 986         # http://bugs.gpodder.org/attachment.cgi?id=236
 987         if check_only:
 988             if self.filename is None:
 989                 return None
 990             else:
 991                 return os.path.join(self.channel.save_dir, self.filename)
 992
 993         if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
 994             # Try to find a new filename for the current file
 995             if template is not None:
 996                 # If template is specified, trust the template's extension
 997                 episode_filename, ext = os.path.splitext(template)
 998             else:
 999                 episode_filename, extension_UNUSED = util.filename_from_url(self.url)
1000             fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1001
1002             if 'redirect' in fn_template and template is None:
1003                 # This looks like a redirection URL - force URL resolving!
1004                 log('Looks like a redirection to me: %s', self.url, sender=self)
1005                 url = util.get_real_url(self.channel.authenticate_url(self.url))
1006                 log('Redirection resolved to: %s', url, sender=self)
1007                 (episode_filename, extension_UNUSED) = util.filename_from_url(url)
1008                 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1009
1010             # Use the video title for YouTube downloads
1011             for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
1012                 if self.url.startswith(yt_url):
1013                     fn_template = util.sanitize_filename(os.path.basename(self.title), self.MAX_FILENAME_LENGTH)
1014
1015             # If the basename is empty, use the md5 hexdigest of the URL
1016             if len(fn_template) == 0 or fn_template.startswith('redirect.'):
1017                 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self.channel.url, self.url, sender=self)
1018                 fn_template = urldigest
1019
1020             # Find a unique filename for this episode
1021             wanted_filename = self.find_unique_file_name(self.url, fn_template, ext)
1022
1023             # We populate the filename field the first time - does the old file still exist?
1024             if self.filename is None and os.path.exists(os.path.join(self.channel.save_dir, urldigest+ext)):
1025                 log('Found pre-0.15.0 downloaded file: %s', urldigest, sender=self)
1026                 self.filename = urldigest+ext
1027
1028             # The old file exists, but we have decided to want a different filename
1029             if self.filename is not None and wanted_filename != self.filename:
1030                 # there might be an old download folder crawling around - move it!
1031                 new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
1032                 old_file_name = os.path.join(self.channel.save_dir, self.filename)
1033                 if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
1034                     log('Renaming %s => %s', old_file_name, new_file_name, sender=self)
1035                     os.rename(old_file_name, new_file_name)
1036                 elif force_update and not os.path.exists(old_file_name):
1037                     # When we call force_update, the file might not yet exist when we
1038                     # call it from the downloading code before saving the file
1039                     log('Choosing new filename: %s', new_file_name, sender=self)
1040                 else:
1041                     log('Warning: %s exists or %s does not.', new_file_name, old_file_name, sender=self)
1042                 log('Updating filename of %s to "%s".', self.url, wanted_filename, sender=self)
1043             elif self.filename is None:
1044                 log('Setting filename to "%s".', wanted_filename, sender=self)
1045             else:
1046                 log('Should update filename. Stays the same (%s). Good!', \
1047                         wanted_filename, sender=self)
1048             self.filename = wanted_filename
1049             self.save()
1050             self.db.commit()
1051
1052         return os.path.join(self.channel.save_dir, self.filename)
1053
1054     def set_mimetype(self, mimetype, commit=False):
1055         """Sets the mimetype for this episode"""
1056         self.mimetype = mimetype
1057         if commit:
1058             self.db.commit()
1059
1060     def extension(self, may_call_local_filename=True):
1061         filename, ext = util.filename_from_url(self.url)
1062         if may_call_local_filename:
1063             filename = self.local_filename(create=False)
1064             if filename is not None:
1065                 filename, ext = os.path.splitext(filename)
1066         # if we can't detect the extension from the url fallback on the mimetype
1067         if ext == '' or util.file_type_by_extension(ext) is None:
1068             ext = util.extension_from_mimetype(self.mimetype)
1069         return ext
1070
1071     def check_is_new(self, downloading=lambda e: False):
1072         """
1073         Returns True if this episode is to be considered new.
1074         "Downloading" should be a callback that gets an episode
1075         as its parameter and returns True if the episode is
1076         being downloaded at the moment.
1077         """
1078         return self.state == gpodder.STATE_NORMAL and \
1079                 not self.is_played and \
1080                 not downloading(self)
1081
1082     def mark_new(self):
1083         self.state = gpodder.STATE_NORMAL
1084         self.is_played = False
1085         self.db.update_episode_state(self)
1086
1087     def mark_old(self):
1088         self.is_played = True
1089         self.db.update_episode_state(self)
1090
1091     def file_exists(self):
1092         filename = self.local_filename(create=False, check_only=True)
1093         if filename is None:
1094             return False
1095         else:
1096             return os.path.exists(filename)
1097
1098     def was_downloaded(self, and_exists=False):
1099         if self.state != gpodder.STATE_DOWNLOADED:
1100             return False
1101         if and_exists and not self.file_exists():
1102             return False
1103         return True
1104
1105     def sync_filename(self, use_custom=False, custom_format=None):
1106         if use_custom:
1107             return util.object_string_formatter(custom_format,
1108                     episode=self, podcast=self.channel)
1109         else:
1110             return self.title
1111
1112     def file_type(self):
1113         # Assume all YouTube links are video files
1114         if youtube.is_video_link(self.url):
1115             return 'video'
1116
1117         return util.file_type_by_extension(self.extension())
1118
1119     @property
1120     def basename( self):
1121         return os.path.splitext( os.path.basename( self.url))[0]
1122
1123     @property
1124     def published( self):
1125         """
1126         Returns published date as YYYYMMDD (or 00000000 if not available)
1127         """
1128         try:
1129             return datetime.datetime.fromtimestamp(self.pubDate).strftime('%Y%m%d')
1130         except:
1131             log( 'Cannot format pubDate for "%s".', self.title, sender = self)
1132             return '00000000'
1133
1134     @property
1135     def pubtime(self):
1136         """
1137         Returns published time as HHMM (or 0000 if not available)
1138         """
1139         try:
1140             return datetime.datetime.fromtimestamp(self.pubDate).strftime('%H%M')
1141         except:
1142             log('Cannot format pubDate (time) for "%s".', self.title, sender=self)
1143             return '0000'
1144
1145     def cute_pubdate(self):
1146         result = util.format_date(self.pubDate)
1147         if result is None:
1148             return '(%s)' % _('unknown')
1149         else:
1150             return result
1151
1152     pubdate_prop = property(fget=cute_pubdate)
1153
1154     def calculate_filesize( self):
1155         filename = self.local_filename(create=False)
1156         if filename is None:
1157             log('calculate_filesized called, but filename is None!', sender=self)
1158         try:
1159             self.length = os.path.getsize(filename)
1160         except:
1161             log( 'Could not get filesize for %s.', self.url)
1162
1163     def get_filesize_string(self):
1164         return util.format_filesize(self.length)
1165
1166     filesize_prop = property(fget=get_filesize_string)
1167
1168     def get_played_string( self):
1169         if not self.is_played:
1170             return _('Unplayed')
1171
1172         return ''
1173
1174     played_prop = property(fget=get_played_string)
1175
1176     def is_duplicate(self, episode):
1177         if self.title == episode.title and self.pubDate == episode.pubDate:
1178             log('Possible duplicate detected: %s', self.title)
1179             return True
1180         return False
1181
1182     def duplicate_id(self):
1183         return hash((self.title, self.pubDate))
1184
1185     def update_from(self, episode):
1186         for k in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
1187             setattr(self, k, getattr(episode, k))
1188