Sort entry list before cutting it off (bug 1186)
[gpodder.git] / src / gpodder / model.py
blob33c8fc8a459c08a9f216a0960b0a62d2a46615e7
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # gpodder.model - Core model classes for gPodder (2009-08-13)
23 # Based on libpodcasts.py (thp, 2005-10-29)
26 import gpodder
27 from gpodder import util
28 from gpodder import feedcore
29 from gpodder import youtube
30 from gpodder import corestats
31 from gpodder import gstreamer
33 from gpodder.liblogger import log
35 import os
36 import re
37 import glob
38 import shutil
39 import time
40 import datetime
41 import rfc822
42 import hashlib
43 import feedparser
44 import xml.sax.saxutils
46 _ = gpodder.gettext
49 class CustomFeed(feedcore.ExceptionWithData): pass
51 class gPodderFetcher(feedcore.Fetcher):
52 """
53 This class extends the feedcore Fetcher with the gPodder User-Agent and the
54 Proxy handler based on the current settings in gPodder and provides a
55 convenience method (fetch_channel) for use by PodcastChannel objects.
56 """
57 custom_handlers = []
59 def __init__(self):
60 feedcore.Fetcher.__init__(self, gpodder.user_agent)
62 def fetch_channel(self, channel):
63 etag = channel.etag
64 modified = feedparser._parse_date(channel.last_modified)
65 # If we have a username or password, rebuild the url with them included
66 # Note: using a HTTPBasicAuthHandler would be pain because we need to
67 # know the realm. It can be done, but I think this method works, too
68 url = channel.authenticate_url(channel.url)
69 for handler in self.custom_handlers:
70 custom_feed = handler.handle_url(url)
71 if custom_feed is not None:
72 raise CustomFeed(custom_feed)
73 self.fetch(url, etag, modified)
75 def _resolve_url(self, url):
76 return youtube.get_real_channel_url(url)
78 @classmethod
79 def register(cls, handler):
80 cls.custom_handlers.append(handler)
82 # def _get_handlers(self):
83 # # Add a ProxyHandler for fetching data via a proxy server
84 # proxies = {'http': 'http://proxy.example.org:8080'}
85 # return[urllib2.ProxyHandler(proxies))]
87 # The "register" method is exposed here for external usage
88 register_custom_handler = gPodderFetcher.register
90 class PodcastModelObject(object):
91 """
92 A generic base class for our podcast model providing common helper
93 and utility functions.
94 """
96 @classmethod
97 def create_from_dict(cls, d, *args):
98 """
99 Create a new object, passing "args" to the constructor
100 and then updating the object with the values from "d".
102 o = cls(*args)
103 o.update_from_dict(d)
104 return o
106 def update_from_dict(self, d):
108 Updates the attributes of this object with values from the
109 dictionary "d" by using the keys found in "d".
111 for k in d:
112 if hasattr(self, k):
113 setattr(self, k, d[k])
116 class PodcastChannel(PodcastModelObject):
117 """holds data for a complete channel"""
118 MAX_FOLDERNAME_LENGTH = 150
119 SECONDS_PER_WEEK = 7*24*60*60
121 feed_fetcher = gPodderFetcher()
123 @classmethod
124 def build_factory(cls, download_dir):
125 def factory(dict, db):
126 return cls.create_from_dict(dict, db, download_dir)
127 return factory
129 @classmethod
130 def load_from_db(cls, db, download_dir):
131 return db.load_channels(factory=cls.build_factory(download_dir))
133 @classmethod
134 def load(cls, db, url, create=True, authentication_tokens=None,\
135 max_episodes=0, download_dir=None, allow_empty_feeds=False, \
136 mimetype_prefs=''):
137 if isinstance(url, unicode):
138 url = url.encode('utf-8')
140 tmp = db.load_channels(factory=cls.build_factory(download_dir), url=url)
141 if len(tmp):
142 return tmp[0]
143 elif create:
144 tmp = PodcastChannel(db, download_dir)
145 tmp.url = url
146 if authentication_tokens is not None:
147 tmp.username = authentication_tokens[0]
148 tmp.password = authentication_tokens[1]
150 tmp.update(max_episodes, mimetype_prefs)
151 tmp.save()
152 db.force_last_new(tmp)
153 # Subscribing to empty feeds should yield an error (except if
154 # the user specifically allows empty feeds in the config UI)
155 if sum(tmp.get_statistics()) == 0 and not allow_empty_feeds:
156 tmp.delete()
157 raise Exception(_('No downloadable episodes in feed'))
158 return tmp
160 def episode_factory(self, d, db__parameter_is_unused=None):
162 This function takes a dictionary containing key-value pairs for
163 episodes and returns a new PodcastEpisode object that is connected
164 to this PodcastChannel object.
166 Returns: A new PodcastEpisode object
168 return PodcastEpisode.create_from_dict(d, self)
170 def _consume_custom_feed(self, custom_feed, max_episodes=0):
171 self.title = custom_feed.get_title()
172 self.link = custom_feed.get_link()
173 self.description = custom_feed.get_description()
174 self.image = custom_feed.get_image()
175 self.pubDate = time.time()
176 self.save()
178 guids = [episode.guid for episode in self.get_all_episodes()]
180 # Insert newly-found episodes into the database
181 custom_feed.get_new_episodes(self, guids)
183 self.save()
185 self.db.purge(max_episodes, self.id)
187 def _consume_updated_feed(self, feed, max_episodes=0, mimetype_prefs=''):
188 self.parse_error = feed.get('bozo_exception', None)
190 # Replace multi-space and newlines with single space (Maemo bug 11173)
191 self.title = re.sub('\s+', ' ', feed.feed.get('title', self.url))
193 self.link = feed.feed.get('link', self.link)
194 self.description = feed.feed.get('subtitle', self.description)
195 # Start YouTube-specific title FIX
196 YOUTUBE_PREFIX = 'Uploads by '
197 if self.title.startswith(YOUTUBE_PREFIX):
198 self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
199 # End YouTube-specific title FIX
201 try:
202 self.pubDate = rfc822.mktime_tz(feed.feed.get('updated_parsed', None+(0,)))
203 except:
204 self.pubDate = time.time()
206 if hasattr(feed.feed, 'image'):
207 for attribute in ('href', 'url'):
208 new_value = getattr(feed.feed.image, attribute, None)
209 if new_value is not None:
210 log('Found cover art in %s: %s', attribute, new_value)
211 self.image = new_value
213 if hasattr(feed.feed, 'icon'):
214 self.image = feed.feed.icon
216 self.save()
218 # Load all episodes to update them properly.
219 existing = self.get_all_episodes()
221 # We can limit the maximum number of entries that gPodder will parse
222 if max_episodes > 0 and len(feed.entries) > max_episodes:
223 # We have to sort the entries in descending chronological order,
224 # because if the feed lists items in ascending order and has >
225 # max_episodes old episodes, new episodes will not be shown.
226 # See also: gPodder Bug 1186
227 try:
228 entries = sorted(feed.entries, \
229 key=lambda x: x.get('updated_parsed', (0,)*9), \
230 reverse=True)[:max_episodes]
231 except Exception, e:
232 log('Could not sort episodes: %s', e, sender=self, traceback=True)
233 entries = feed.entries[:max_episodes]
234 else:
235 entries = feed.entries
237 # Title + PubDate hashes for existing episodes
238 existing_dupes = dict((e.duplicate_id(), e) for e in existing)
240 # GUID-based existing episode list
241 existing_guids = dict((e.guid, e) for e in existing)
243 # Get most recent pubDate of all episodes
244 last_pubdate = self.db.get_last_pubdate(self) or 0
246 # Search all entries for new episodes
247 for entry in entries:
248 try:
249 episode = PodcastEpisode.from_feedparser_entry(entry, self, mimetype_prefs)
250 if episode is not None and not episode.title:
251 episode.title, ext = os.path.splitext(os.path.basename(episode.url))
252 except Exception, e:
253 log('Cannot instantiate episode: %s. Skipping.', e, sender=self, traceback=True)
254 continue
256 if episode is None:
257 continue
259 # Detect (and update) existing episode based on GUIDs
260 existing_episode = existing_guids.get(episode.guid, None)
261 if existing_episode:
262 existing_episode.update_from(episode)
263 existing_episode.save()
264 continue
266 # Detect (and update) existing episode based on duplicate ID
267 existing_episode = existing_dupes.get(episode.duplicate_id(), None)
268 if existing_episode:
269 if existing_episode.is_duplicate(episode):
270 existing_episode.update_from(episode)
271 existing_episode.save()
272 continue
274 # Workaround for bug 340: If the episode has been
275 # published earlier than one week before the most
276 # recent existing episode, do not mark it as new.
277 if episode.pubDate < last_pubdate - self.SECONDS_PER_WEEK:
278 log('Episode with old date: %s', episode.title, sender=self)
279 episode.is_played = True
281 episode.save()
283 # Remove "unreachable" episodes - episodes that have not been
284 # downloaded and that the feed does not list as downloadable anymore
285 if self.id is not None:
286 seen_guids = set(e.guid for e in feed.entries if hasattr(e, 'guid'))
287 episodes_to_purge = (e for e in existing if \
288 e.state != gpodder.STATE_DOWNLOADED and \
289 e.guid not in seen_guids and e.guid is not None)
290 for episode in episodes_to_purge:
291 log('Episode removed from feed: %s (%s)', episode.title, \
292 episode.guid, sender=self)
293 self.db.delete_episode_by_guid(episode.guid, self.id)
295 # This *might* cause episodes to be skipped if there were more than
296 # max_episodes_per_feed items added to the feed between updates.
297 # The benefit is that it prevents old episodes from apearing as new
298 # in certain situations (see bug #340).
299 self.db.purge(max_episodes, self.id)
301 def update_channel_lock(self):
302 self.db.update_channel_lock(self)
304 def _update_etag_modified(self, feed):
305 self.updated_timestamp = time.time()
306 self.calculate_publish_behaviour()
307 self.etag = feed.headers.get('etag', self.etag)
308 self.last_modified = feed.headers.get('last-modified', self.last_modified)
310 def query_automatic_update(self):
311 """Query if this channel should be updated automatically
313 Returns True if the update should happen in automatic
314 mode or False if this channel should be skipped (timeout
315 not yet reached or release not expected right now).
317 updated = self.updated_timestamp
318 expected = self.release_expected
320 now = time.time()
321 one_day_ago = now - 60*60*24
322 lastcheck = now - 60*10
324 return updated < one_day_ago or \
325 (expected < now and updated < lastcheck)
327 def update(self, max_episodes=0, mimetype_prefs=''):
328 try:
329 self.feed_fetcher.fetch_channel(self)
330 except CustomFeed, updated:
331 custom_feed = updated.data
332 self._consume_custom_feed(custom_feed, max_episodes)
333 self.save()
334 except feedcore.UpdatedFeed, updated:
335 feed = updated.data
336 self._consume_updated_feed(feed, max_episodes, mimetype_prefs)
337 self._update_etag_modified(feed)
338 self.save()
339 except feedcore.NewLocation, updated:
340 feed = updated.data
341 self.url = feed.href
342 self._consume_updated_feed(feed, max_episodes, mimetype_prefs)
343 self._update_etag_modified(feed)
344 self.save()
345 except feedcore.NotModified, updated:
346 feed = updated.data
347 self._update_etag_modified(feed)
348 self.save()
349 except Exception, e:
350 # "Not really" errors
351 #feedcore.AuthenticationRequired
352 # Temporary errors
353 #feedcore.Offline
354 #feedcore.BadRequest
355 #feedcore.InternalServerError
356 #feedcore.WifiLogin
357 # Permanent errors
358 #feedcore.Unsubscribe
359 #feedcore.NotFound
360 #feedcore.InvalidFeed
361 #feedcore.UnknownStatusCode
362 raise
364 if gpodder.user_hooks is not None:
365 gpodder.user_hooks.on_podcast_updated(self)
367 self.db.commit()
369 def delete(self):
370 self.db.delete_channel(self)
372 def save(self):
373 if gpodder.user_hooks is not None:
374 gpodder.user_hooks.on_podcast_save(self)
375 if self.foldername is None:
376 # get_save_dir() finds a unique value for foldername
377 self.get_save_dir()
378 self.db.save_channel(self)
380 def get_statistics(self):
381 if self.id is None:
382 return (0, 0, 0, 0, 0)
383 else:
384 return self.db.get_channel_count(int(self.id))
386 def authenticate_url(self, url):
387 return util.url_add_authentication(url, self.username, self.password)
389 def __init__(self, db, download_dir):
390 self.db = db
391 self.download_dir = download_dir
392 self.id = None
393 self.url = None
394 self.title = ''
395 self.link = ''
396 self.description = ''
397 self.image = None
398 self.pubDate = 0
399 self.parse_error = None
400 self.foldername = None
401 self.auto_foldername = 1 # automatically generated foldername
403 # should this channel be synced to devices? (ex: iPod)
404 self.sync_to_devices = True
405 # to which playlist should be synced
406 self.device_playlist_name = 'gPodder'
407 # if set, this overrides the channel-provided title
408 self.override_title = ''
409 self.username = ''
410 self.password = ''
412 self.last_modified = None
413 self.etag = None
415 self.save_dir_size = 0
416 self.__save_dir_size_set = False
418 self.channel_is_locked = False
420 self.release_expected = time.time()
421 self.release_deviation = 0
422 self.updated_timestamp = 0
423 self.feed_update_enabled = True
425 def calculate_publish_behaviour(self):
426 episodes = self.db.load_episodes(self, factory=self.episode_factory, limit=30)
427 if len(episodes) < 3:
428 return
430 deltas = []
431 latest = max(e.pubDate for e in episodes)
432 for index in range(len(episodes)-1):
433 if episodes[index].pubDate != 0 and episodes[index+1].pubDate != 0:
434 deltas.append(episodes[index].pubDate - episodes[index+1].pubDate)
436 if len(deltas) > 1:
437 stats = corestats.Stats(deltas)
438 self.release_expected = min([latest+stats.stdev(), latest+(stats.min()+stats.avg())*.5])
439 self.release_deviation = stats.stdev()
440 else:
441 self.release_expected = latest
442 self.release_deviation = 0
444 def request_save_dir_size(self):
445 if not self.__save_dir_size_set:
446 self.update_save_dir_size()
447 self.__save_dir_size_set = True
449 def update_save_dir_size(self):
450 self.save_dir_size = util.calculate_size(self.save_dir)
452 def get_title( self):
453 if self.override_title:
454 return self.override_title
455 elif not self.__title.strip():
456 return self.url
457 else:
458 return self.__title
460 def set_title( self, value):
461 self.__title = value.strip()
463 title = property(fget=get_title,
464 fset=set_title)
466 def set_custom_title( self, custom_title):
467 custom_title = custom_title.strip()
469 # if the custom title is the same as we have
470 if custom_title == self.override_title:
471 return
473 # if custom title is the same as channel title and we didn't have a custom title
474 if custom_title == self.__title and self.override_title == '':
475 return
477 # make sure self.foldername is initialized
478 self.get_save_dir()
480 # rename folder if custom_title looks sane
481 new_folder_name = self.find_unique_folder_name(custom_title)
482 if len(new_folder_name) > 0 and new_folder_name != self.foldername:
483 log('Changing foldername based on custom title: %s', custom_title, sender=self)
484 new_folder = os.path.join(self.download_dir, new_folder_name)
485 old_folder = os.path.join(self.download_dir, self.foldername)
486 if os.path.exists(old_folder):
487 if not os.path.exists(new_folder):
488 # Old folder exists, new folder does not -> simply rename
489 log('Renaming %s => %s', old_folder, new_folder, sender=self)
490 os.rename(old_folder, new_folder)
491 else:
492 # Both folders exist -> move files and delete old folder
493 log('Moving files from %s to %s', old_folder, new_folder, sender=self)
494 for file in glob.glob(os.path.join(old_folder, '*')):
495 shutil.move(file, new_folder)
496 log('Removing %s', old_folder, sender=self)
497 shutil.rmtree(old_folder, ignore_errors=True)
498 self.foldername = new_folder_name
499 self.save()
501 if custom_title != self.__title:
502 self.override_title = custom_title
503 else:
504 self.override_title = ''
506 def get_downloaded_episodes(self):
507 return self.db.load_episodes(self, factory=self.episode_factory, state=gpodder.STATE_DOWNLOADED)
509 def get_new_episodes(self, downloading=lambda e: False):
511 Get a list of new episodes. You can optionally specify
512 "downloading" as a callback that takes an episode as
513 a parameter and returns True if the episode is currently
514 being downloaded or False if not.
516 By default, "downloading" is implemented so that it
517 reports all episodes as not downloading.
519 return [episode for episode in self.db.load_episodes(self, \
520 factory=self.episode_factory, state=gpodder.STATE_NORMAL) if \
521 episode.check_is_new(downloading=downloading)]
523 def get_playlist_filename(self):
524 # If the save_dir doesn't end with a slash (which it really should
525 # not, if the implementation is correct, we can just append .m3u :)
526 assert self.save_dir[-1] != '/'
527 return self.save_dir+'.m3u'
529 def update_m3u_playlist(self):
530 m3u_filename = self.get_playlist_filename()
532 downloaded_episodes = self.get_downloaded_episodes()
533 if not downloaded_episodes:
534 log('No episodes - removing %s', m3u_filename, sender=self)
535 util.delete_file(m3u_filename)
536 return
538 log('Writing playlist to %s', m3u_filename, sender=self)
539 util.write_m3u_playlist(m3u_filename, \
540 PodcastEpisode.sort_by_pubdate(downloaded_episodes))
542 def get_episode_by_url(self, url):
543 return self.db.load_single_episode(self, \
544 factory=self.episode_factory, url=url)
546 def get_episode_by_filename(self, filename):
547 return self.db.load_single_episode(self, \
548 factory=self.episode_factory, filename=filename)
550 def get_all_episodes(self):
551 return self.db.load_episodes(self, factory=self.episode_factory)
553 def find_unique_folder_name(self, foldername):
554 # Remove trailing dots to avoid errors on Windows (bug 600)
555 foldername = foldername.strip().rstrip('.')
557 current_try = util.sanitize_filename(foldername, \
558 self.MAX_FOLDERNAME_LENGTH)
559 next_try_id = 2
561 while True:
562 if self.db.channel_foldername_exists(current_try):
563 current_try = '%s (%d)' % (foldername, next_try_id)
564 next_try_id += 1
565 else:
566 return current_try
568 def get_save_dir(self):
569 urldigest = hashlib.md5(self.url).hexdigest()
570 sanitizedurl = util.sanitize_filename(self.url, self.MAX_FOLDERNAME_LENGTH)
571 if self.foldername is None or (self.auto_foldername and (self.foldername == urldigest or self.foldername.startswith(sanitizedurl))):
572 # we must change the folder name, because it has not been set manually
573 fn_template = util.sanitize_filename(self.title, self.MAX_FOLDERNAME_LENGTH)
575 # if this is an empty string, try the basename
576 if len(fn_template) == 0:
577 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
578 fn_template = util.sanitize_filename(os.path.basename(self.url), self.MAX_FOLDERNAME_LENGTH)
580 # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
581 if len(fn_template) == 0:
582 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
583 fn_template = urldigest # no need for sanitize_filename here
585 # Find a unique folder name for this podcast
586 wanted_foldername = self.find_unique_folder_name(fn_template)
588 # if the foldername has not been set, check if the (old) md5 filename exists
589 if self.foldername is None and os.path.exists(os.path.join(self.download_dir, urldigest)):
590 log('Found pre-0.15.0 download folder for %s: %s', self.title, urldigest, sender=self)
591 self.foldername = urldigest
593 # we have a valid, new folder name in "current_try" -> use that!
594 if self.foldername is not None and wanted_foldername != self.foldername:
595 # there might be an old download folder crawling around - move it!
596 new_folder_name = os.path.join(self.download_dir, wanted_foldername)
597 old_folder_name = os.path.join(self.download_dir, self.foldername)
598 if os.path.exists(old_folder_name):
599 if not os.path.exists(new_folder_name):
600 # Old folder exists, new folder does not -> simply rename
601 log('Renaming %s => %s', old_folder_name, new_folder_name, sender=self)
602 os.rename(old_folder_name, new_folder_name)
603 else:
604 # Both folders exist -> move files and delete old folder
605 log('Moving files from %s to %s', old_folder_name, new_folder_name, sender=self)
606 for file in glob.glob(os.path.join(old_folder_name, '*')):
607 shutil.move(file, new_folder_name)
608 log('Removing %s', old_folder_name, sender=self)
609 shutil.rmtree(old_folder_name, ignore_errors=True)
610 log('Updating foldername of %s to "%s".', self.url, wanted_foldername, sender=self)
611 self.foldername = wanted_foldername
612 self.save()
614 save_dir = os.path.join(self.download_dir, self.foldername)
616 # Create save_dir if it does not yet exist
617 if not util.make_directory( save_dir):
618 log( 'Could not create save_dir: %s', save_dir, sender = self)
620 return save_dir
622 save_dir = property(fget=get_save_dir)
624 def remove_downloaded(self):
625 # Remove the playlist file if it exists
626 m3u_filename = self.get_playlist_filename()
627 if os.path.exists(m3u_filename):
628 util.delete_file(m3u_filename)
630 # Remove the download directory
631 shutil.rmtree(self.save_dir, True)
633 @property
634 def cover_file(self):
635 new_name = os.path.join(self.save_dir, 'folder.jpg')
636 if not os.path.exists(new_name):
637 old_names = ('cover', '.cover')
638 for old_name in old_names:
639 filename = os.path.join(self.save_dir, old_name)
640 if os.path.exists(filename):
641 shutil.move(filename, new_name)
642 return new_name
644 return new_name
646 def delete_episode(self, episode):
647 filename = episode.local_filename(create=False, check_only=True)
648 if filename is not None:
649 util.delete_file(filename)
651 episode.set_state(gpodder.STATE_DELETED)
654 class PodcastEpisode(PodcastModelObject):
655 """holds data for one object in a channel"""
656 MAX_FILENAME_LENGTH = 200
658 def _get_played(self):
659 return self.is_played
661 def _set_played(self, played):
662 self.is_played = played
664 # Alias "is_played" to "played" for DB column mapping
665 played = property(fget=_get_played, fset=_set_played)
667 def _get_locked(self):
668 return self.is_locked
670 def _set_locked(self, locked):
671 self.is_locked = locked
673 # Alias "is_locked" to "locked" for DB column mapping
674 locked = property(fget=_get_locked, fset=_set_locked)
676 def _get_channel_id(self):
677 return self.channel.id
679 def _set_channel_id(self, channel_id):
680 assert self.channel.id == channel_id
682 # Accessor for the "channel_id" DB column
683 channel_id = property(fget=_get_channel_id, fset=_set_channel_id)
685 @staticmethod
686 def sort_by_pubdate(episodes, reverse=False):
687 """Sort a list of PodcastEpisode objects chronologically
689 Returns a iterable, sorted sequence of the episodes
691 key_pubdate = lambda e: e.pubDate
692 return sorted(episodes, key=key_pubdate, reverse=reverse)
694 def reload_from_db(self):
696 Re-reads all episode details for this object from the
697 database and updates this object accordingly. Can be
698 used to refresh existing objects when the database has
699 been updated (e.g. the filename has been set after a
700 download where it was not set before the download)
702 d = self.db.load_episode(self.id)
703 self.update_from_dict(d or {})
704 return self
706 def has_website_link(self):
707 return bool(self.link) and (self.link != self.url or \
708 youtube.is_video_link(self.link))
710 @staticmethod
711 def from_feedparser_entry(entry, channel, mimetype_prefs=''):
712 episode = PodcastEpisode(channel)
714 # Replace multi-space and newlines with single space (Maemo bug 11173)
715 episode.title = re.sub('\s+', ' ', entry.get('title', ''))
716 episode.link = entry.get('link', '')
717 if 'content' in entry and len(entry['content']) and \
718 entry['content'][0].type == 'text/html':
719 episode.description = entry['content'][0].value
720 else:
721 episode.description = entry.get('summary', '')
723 try:
724 # Parse iTunes-specific podcast duration metadata
725 total_time = util.parse_time(entry.get('itunes_duration', ''))
726 episode.total_time = total_time
727 except:
728 pass
730 # Fallback to subtitle if summary is not available0
731 if not episode.description:
732 episode.description = entry.get('subtitle', '')
734 episode.guid = entry.get('id', '')
735 if entry.get('updated_parsed', None):
736 episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))
738 enclosures = entry.get('enclosures', ())
739 audio_available = any(e.get('type', '').startswith('audio/') \
740 for e in enclosures)
741 video_available = any(e.get('type', '').startswith('video/') \
742 for e in enclosures)
744 # Create the list of preferred mime types
745 mimetype_prefs = mimetype_prefs.split(',')
747 def calculate_preference_value(enclosure):
748 """Calculate preference value of an enclosure
750 This is based on mime types and allows users to prefer
751 certain mime types over others (e.g. MP3 over AAC, ...)
753 mimetype = enclosure.get('type', None)
754 try:
755 # If the mime type is found, return its (zero-based) index
756 return mimetype_prefs.index(mimetype)
757 except ValueError:
758 # If it is not found, assume it comes after all listed items
759 return len(mimetype_prefs)
761 # Enclosures
762 for e in sorted(enclosures, key=calculate_preference_value):
763 episode.mimetype = e.get('type', 'application/octet-stream')
764 if episode.mimetype == '':
765 # See Maemo bug 10036
766 log('Fixing empty mimetype in ugly feed', sender=episode)
767 episode.mimetype = 'application/octet-stream'
769 if '/' not in episode.mimetype:
770 continue
772 # Skip images in feeds if audio or video is available (bug 979)
773 if episode.mimetype.startswith('image/') and \
774 (audio_available or video_available):
775 continue
777 episode.url = util.normalize_feed_url(e.get('href', ''))
778 if not episode.url:
779 continue
781 try:
782 episode.length = int(e.length) or -1
783 except:
784 episode.length = -1
786 return episode
788 # Media RSS content
789 for m in entry.get('media_content', ()):
790 episode.mimetype = m.get('type', 'application/octet-stream')
791 if '/' not in episode.mimetype:
792 continue
794 episode.url = util.normalize_feed_url(m.get('url', ''))
795 if not episode.url:
796 continue
798 try:
799 episode.length = int(m.fileSize) or -1
800 except:
801 episode.length = -1
803 return episode
805 # Brute-force detection of any links
806 for l in entry.get('links', ()):
807 episode.url = util.normalize_feed_url(l.get('href', ''))
808 if not episode.url:
809 continue
811 if youtube.is_video_link(episode.url):
812 return episode
814 # Check if we can resolve this link to a audio/video file
815 filename, extension = util.filename_from_url(episode.url)
816 file_type = util.file_type_by_extension(extension)
817 if file_type is None and hasattr(l, 'type'):
818 extension = util.extension_from_mimetype(l.type)
819 file_type = util.file_type_by_extension(extension)
821 # The link points to a audio or video file - use it!
822 if file_type is not None:
823 return episode
825 # Scan MP3 links in description text
826 mp3s = re.compile(r'http://[^"]*\.mp3')
827 for content in entry.get('content', ()):
828 html = content.value
829 for match in mp3s.finditer(html):
830 episode.url = match.group(0)
831 return episode
833 return None
835 def __init__(self, channel):
836 self.db = channel.db
837 # Used by Storage for faster saving
838 self.id = None
839 self.url = ''
840 self.title = ''
841 self.length = 0
842 self.mimetype = 'application/octet-stream'
843 self.guid = ''
844 self.description = ''
845 self.link = ''
846 self.channel = channel
847 self.pubDate = 0
848 self.filename = None
849 self.auto_filename = 1 # automatically generated filename
851 self.state = gpodder.STATE_NORMAL
852 self.is_played = False
854 # Initialize the "is_locked" property
855 self._is_locked = False
856 self.is_locked = channel.channel_is_locked
858 # Time attributes
859 self.total_time = 0
860 self.current_position = 0
861 self.current_position_updated = 0
863 def get_is_locked(self):
864 return self._is_locked
866 def set_is_locked(self, is_locked):
867 self._is_locked = bool(is_locked)
869 is_locked = property(fget=get_is_locked, fset=set_is_locked)
871 def save(self):
872 if self.state != gpodder.STATE_DOWNLOADED and self.file_exists():
873 self.state = gpodder.STATE_DOWNLOADED
874 if gpodder.user_hooks is not None:
875 gpodder.user_hooks.on_episode_save(self)
876 self.db.save_episode(self)
878 def on_downloaded(self, filename):
879 self.state = gpodder.STATE_DOWNLOADED
880 self.is_played = False
881 self.length = os.path.getsize(filename)
883 if not self.total_time:
884 try:
885 length = gstreamer.get_track_length(filename)
886 if length is not None:
887 length = int(length/1000)
888 log('Detected media length: %d seconds', length, \
889 sender=self)
890 self.total_time = length
891 self.db.save_episode(self)
892 self.db.commit()
893 return
894 except Exception, e:
895 log('Error while detecting media length: %s', str(e), \
896 sender=self)
898 self.db.save_downloaded_episode(self)
899 self.db.commit()
901 def set_state(self, state):
902 self.state = state
903 self.db.update_episode_state(self)
905 def mark(self, state=None, is_played=None, is_locked=None):
906 if state is not None:
907 self.state = state
908 if is_played is not None:
909 self.is_played = is_played
910 if is_locked is not None:
911 self.is_locked = is_locked
912 self.db.update_episode_state(self)
914 @property
915 def title_markup(self):
916 return '%s\n<small>%s</small>' % (xml.sax.saxutils.escape(self.title),
917 xml.sax.saxutils.escape(self.channel.title))
919 @property
920 def maemo_markup(self):
921 if self.length > 0:
922 length_str = '%s; ' % self.filesize_prop
923 else:
924 length_str = ''
925 return ('<b>%s</b>\n<small>%s'+_('released %s')+ \
926 '; '+_('from %s')+'</small>') % (\
927 xml.sax.saxutils.escape(re.sub('\s+', ' ', self.title)), \
928 xml.sax.saxutils.escape(length_str), \
929 xml.sax.saxutils.escape(self.pubdate_prop), \
930 xml.sax.saxutils.escape(re.sub('\s+', ' ', self.channel.title)))
932 @property
933 def maemo_remove_markup(self):
934 if self.total_time and self.current_position:
935 played_string = self.get_play_info_string()
936 elif self.is_played:
937 played_string = _('played')
938 else:
939 played_string = _('unplayed')
940 downloaded_string = self.get_age_string()
941 if not downloaded_string:
942 downloaded_string = _('today')
943 return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
944 '; '+_('from %s')+'</small>') % (\
945 xml.sax.saxutils.escape(self.title), \
946 xml.sax.saxutils.escape(self.filesize_prop), \
947 xml.sax.saxutils.escape(played_string), \
948 xml.sax.saxutils.escape(downloaded_string), \
949 xml.sax.saxutils.escape(self.channel.title))
951 def age_in_days(self):
952 return util.file_age_in_days(self.local_filename(create=False, \
953 check_only=True))
955 age_int_prop = property(fget=age_in_days)
957 def get_age_string(self):
958 return util.file_age_to_string(self.age_in_days())
960 age_prop = property(fget=get_age_string)
962 def one_line_description( self):
963 lines = util.remove_html_tags(self.description or '').strip().splitlines()
964 if not lines or lines[0] == '':
965 return _('No description available')
966 else:
967 return ' '.join(lines)
969 def delete_from_disk(self):
970 try:
971 self.channel.delete_episode(self)
972 except:
973 log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self)
975 def find_unique_file_name(self, url, filename, extension):
976 current_try = util.sanitize_filename(filename, self.MAX_FILENAME_LENGTH)+extension
977 next_try_id = 2
978 lookup_url = None
980 if self.filename == current_try and current_try is not None:
981 # We already have this filename - good!
982 return current_try
984 while self.db.episode_filename_exists(current_try):
985 current_try = '%s (%d)%s' % (filename, next_try_id, extension)
986 next_try_id += 1
988 return current_try
990 def local_filename(self, create, force_update=False, check_only=False,
991 template=None):
992 """Get (and possibly generate) the local saving filename
994 Pass create=True if you want this function to generate a
995 new filename if none exists. You only want to do this when
996 planning to create/download the file after calling this function.
998 Normally, you should pass create=False. This will only
999 create a filename when the file already exists from a previous
1000 version of gPodder (where we used md5 filenames). If the file
1001 does not exist (and the filename also does not exist), this
1002 function will return None.
1004 If you pass force_update=True to this function, it will try to
1005 find a new (better) filename and move the current file if this
1006 is the case. This is useful if (during the download) you get
1007 more information about the file, e.g. the mimetype and you want
1008 to include this information in the file name generation process.
1010 If check_only=True is passed to this function, it will never try
1011 to rename the file, even if would be a good idea. Use this if you
1012 only want to check if a file exists.
1014 If "template" is specified, it should be a filename that is to
1015 be used as a template for generating the "real" filename.
1017 The generated filename is stored in the database for future access.
1019 ext = self.extension(may_call_local_filename=False).encode('utf-8', 'ignore')
1021 # For compatibility with already-downloaded episodes, we
1022 # have to know md5 filenames if they are downloaded already
1023 urldigest = hashlib.md5(self.url).hexdigest()
1025 if not create and self.filename is None:
1026 urldigest_filename = os.path.join(self.channel.save_dir, urldigest+ext)
1027 if os.path.exists(urldigest_filename):
1028 # The file exists, so set it up in our database
1029 log('Recovering pre-0.15.0 file: %s', urldigest_filename, sender=self)
1030 self.filename = urldigest+ext
1031 self.auto_filename = 1
1032 self.save()
1033 return urldigest_filename
1034 return None
1036 # We only want to check if the file exists, so don't try to
1037 # rename the file, even if it would be reasonable. See also:
1038 # http://bugs.gpodder.org/attachment.cgi?id=236
1039 if check_only:
1040 if self.filename is None:
1041 return None
1042 else:
1043 return os.path.join(self.channel.save_dir, self.filename)
1045 if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
1046 # Try to find a new filename for the current file
1047 if template is not None:
1048 # If template is specified, trust the template's extension
1049 episode_filename, ext = os.path.splitext(template)
1050 else:
1051 episode_filename, extension_UNUSED = util.filename_from_url(self.url)
1052 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1054 if 'redirect' in fn_template and template is None:
1055 # This looks like a redirection URL - force URL resolving!
1056 log('Looks like a redirection to me: %s', self.url, sender=self)
1057 url = util.get_real_url(self.channel.authenticate_url(self.url))
1058 log('Redirection resolved to: %s', url, sender=self)
1059 (episode_filename, extension_UNUSED) = util.filename_from_url(url)
1060 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1062 # Use the video title for YouTube downloads
1063 for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
1064 if self.url.startswith(yt_url):
1065 fn_template = util.sanitize_filename(os.path.basename(self.title), self.MAX_FILENAME_LENGTH)
1067 # If the basename is empty, use the md5 hexdigest of the URL
1068 if len(fn_template) == 0 or fn_template.startswith('redirect.'):
1069 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self.channel.url, self.url, sender=self)
1070 fn_template = urldigest
1072 # Find a unique filename for this episode
1073 wanted_filename = self.find_unique_file_name(self.url, fn_template, ext)
1075 # We populate the filename field the first time - does the old file still exist?
1076 if self.filename is None and os.path.exists(os.path.join(self.channel.save_dir, urldigest+ext)):
1077 log('Found pre-0.15.0 downloaded file: %s', urldigest, sender=self)
1078 self.filename = urldigest+ext
1080 # The old file exists, but we have decided to want a different filename
1081 if self.filename is not None and wanted_filename != self.filename:
1082 # there might be an old download folder crawling around - move it!
1083 new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
1084 old_file_name = os.path.join(self.channel.save_dir, self.filename)
1085 if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
1086 log('Renaming %s => %s', old_file_name, new_file_name, sender=self)
1087 os.rename(old_file_name, new_file_name)
1088 elif force_update and not os.path.exists(old_file_name):
1089 # When we call force_update, the file might not yet exist when we
1090 # call it from the downloading code before saving the file
1091 log('Choosing new filename: %s', new_file_name, sender=self)
1092 else:
1093 log('Warning: %s exists or %s does not.', new_file_name, old_file_name, sender=self)
1094 log('Updating filename of %s to "%s".', self.url, wanted_filename, sender=self)
1095 elif self.filename is None:
1096 log('Setting filename to "%s".', wanted_filename, sender=self)
1097 else:
1098 log('Should update filename. Stays the same (%s). Good!', \
1099 wanted_filename, sender=self)
1100 self.filename = wanted_filename
1101 self.save()
1102 self.db.commit()
1104 return os.path.join(self.channel.save_dir, self.filename)
1106 def set_mimetype(self, mimetype, commit=False):
1107 """Sets the mimetype for this episode"""
1108 self.mimetype = mimetype
1109 if commit:
1110 self.db.commit()
1112 def extension(self, may_call_local_filename=True):
1113 filename, ext = util.filename_from_url(self.url)
1114 if may_call_local_filename:
1115 filename = self.local_filename(create=False)
1116 if filename is not None:
1117 filename, ext = os.path.splitext(filename)
1118 # if we can't detect the extension from the url fallback on the mimetype
1119 if ext == '' or util.file_type_by_extension(ext) is None:
1120 ext = util.extension_from_mimetype(self.mimetype)
1121 return ext
1123 def check_is_new(self, downloading=lambda e: False):
1125 Returns True if this episode is to be considered new.
1126 "Downloading" should be a callback that gets an episode
1127 as its parameter and returns True if the episode is
1128 being downloaded at the moment.
1130 return self.state == gpodder.STATE_NORMAL and \
1131 not self.is_played and \
1132 not downloading(self)
1134 def mark_new(self):
1135 self.state = gpodder.STATE_NORMAL
1136 self.is_played = False
1137 self.db.update_episode_state(self)
1139 def mark_old(self):
1140 self.is_played = True
1141 self.db.update_episode_state(self)
1143 def file_exists(self):
1144 filename = self.local_filename(create=False, check_only=True)
1145 if filename is None:
1146 return False
1147 else:
1148 return os.path.exists(filename)
1150 def was_downloaded(self, and_exists=False):
1151 if self.state != gpodder.STATE_DOWNLOADED:
1152 return False
1153 if and_exists and not self.file_exists():
1154 return False
1155 return True
1157 def sync_filename(self, use_custom=False, custom_format=None):
1158 if use_custom:
1159 return util.object_string_formatter(custom_format,
1160 episode=self, podcast=self.channel)
1161 else:
1162 return self.title
1164 def file_type(self):
1165 # Assume all YouTube links are video files
1166 if youtube.is_video_link(self.url):
1167 return 'video'
1169 return util.file_type_by_extension(self.extension())
1171 @property
1172 def basename( self):
1173 return os.path.splitext( os.path.basename( self.url))[0]
1175 @property
1176 def published( self):
1178 Returns published date as YYYYMMDD (or 00000000 if not available)
1180 try:
1181 return datetime.datetime.fromtimestamp(self.pubDate).strftime('%Y%m%d')
1182 except:
1183 log( 'Cannot format pubDate for "%s".', self.title, sender = self)
1184 return '00000000'
1186 @property
1187 def pubtime(self):
1189 Returns published time as HHMM (or 0000 if not available)
1191 try:
1192 return datetime.datetime.fromtimestamp(self.pubDate).strftime('%H%M')
1193 except:
1194 log('Cannot format pubDate (time) for "%s".', self.title, sender=self)
1195 return '0000'
1197 def playlist_title(self):
1198 """Return a title for this episode in a playlist
1200 The title will be composed of the podcast name, the
1201 episode name and the publication date. The return
1202 value is the canonical representation of this episode
1203 in playlists (for example, M3U playlists).
1205 return '%s - %s (%s)' % (self.channel.title, \
1206 self.title, \
1207 self.cute_pubdate())
1209 def cute_pubdate(self):
1210 result = util.format_date(self.pubDate)
1211 if result is None:
1212 return '(%s)' % _('unknown')
1213 else:
1214 return result
1216 pubdate_prop = property(fget=cute_pubdate)
1218 def calculate_filesize( self):
1219 filename = self.local_filename(create=False)
1220 if filename is None:
1221 log('calculate_filesized called, but filename is None!', sender=self)
1222 try:
1223 self.length = os.path.getsize(filename)
1224 except:
1225 log( 'Could not get filesize for %s.', self.url)
1227 def is_finished(self):
1228 """Return True if this episode is considered "finished playing"
1230 An episode is considered "finished" when there is a
1231 current position mark on the track, and when the
1232 current position is greater than 99 percent of the
1233 total time or inside the last 10 seconds of a track.
1235 return self.current_position > 0 and \
1236 (self.current_position + 10 >= self.total_time or \
1237 self.current_position >= self.total_time*.99)
1239 def get_play_info_string(self):
1240 if self.is_finished():
1241 return '%s (%s)' % (_('Finished'), self.get_duration_string(),)
1242 if self.current_position > 0:
1243 return '%s / %s' % (self.get_position_string(), \
1244 self.get_duration_string())
1245 else:
1246 return self.get_duration_string()
1248 def get_position_string(self):
1249 return util.format_time(self.current_position)
1251 def get_duration_string(self):
1252 return util.format_time(self.total_time)
1254 def get_filesize_string(self):
1255 return util.format_filesize(self.length)
1257 filesize_prop = property(fget=get_filesize_string)
1259 def get_played_string( self):
1260 if not self.is_played:
1261 return _('Unplayed')
1263 return ''
1265 played_prop = property(fget=get_played_string)
1267 def is_duplicate(self, episode):
1268 if self.title == episode.title and self.pubDate == episode.pubDate:
1269 log('Possible duplicate detected: %s', self.title)
1270 return True
1271 return False
1273 def duplicate_id(self):
1274 return hash((self.title, self.pubDate))
1276 def update_from(self, episode):
1277 for k in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
1278 setattr(self, k, getattr(episode, k))