Use <content:encoded> from feeds (bug 1144)
[gpodder.git] / src / gpodder / model.py
blob9f9264843f7bb85f2ad9d3534582dd1b44cbc5f1
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # gpodder.model - Core model classes for gPodder (2009-08-13)
23 # Based on libpodcasts.py (thp, 2005-10-29)
26 import gpodder
27 from gpodder import util
28 from gpodder import feedcore
29 from gpodder import youtube
30 from gpodder import corestats
31 from gpodder import gstreamer
33 from gpodder.liblogger import log
35 import os
36 import re
37 import glob
38 import shutil
39 import time
40 import datetime
41 import rfc822
42 import hashlib
43 import feedparser
44 import xml.sax.saxutils
46 _ = gpodder.gettext
49 class CustomFeed(feedcore.ExceptionWithData): pass
51 class gPodderFetcher(feedcore.Fetcher):
52 """
53 This class extends the feedcore Fetcher with the gPodder User-Agent and the
54 Proxy handler based on the current settings in gPodder and provides a
55 convenience method (fetch_channel) for use by PodcastChannel objects.
56 """
57 custom_handlers = []
59 def __init__(self):
60 feedcore.Fetcher.__init__(self, gpodder.user_agent)
62 def fetch_channel(self, channel):
63 etag = channel.etag
64 modified = feedparser._parse_date(channel.last_modified)
65 # If we have a username or password, rebuild the url with them included
66 # Note: using a HTTPBasicAuthHandler would be pain because we need to
67 # know the realm. It can be done, but I think this method works, too
68 url = channel.authenticate_url(channel.url)
69 for handler in self.custom_handlers:
70 custom_feed = handler.handle_url(url)
71 if custom_feed is not None:
72 raise CustomFeed(custom_feed)
73 self.fetch(url, etag, modified)
75 def _resolve_url(self, url):
76 return youtube.get_real_channel_url(url)
78 @classmethod
79 def register(cls, handler):
80 cls.custom_handlers.append(handler)
82 # def _get_handlers(self):
83 # # Add a ProxyHandler for fetching data via a proxy server
84 # proxies = {'http': 'http://proxy.example.org:8080'}
85 # return[urllib2.ProxyHandler(proxies))]
87 # The "register" method is exposed here for external usage
88 register_custom_handler = gPodderFetcher.register
90 class PodcastModelObject(object):
91 """
92 A generic base class for our podcast model providing common helper
93 and utility functions.
94 """
96 @classmethod
97 def create_from_dict(cls, d, *args):
98 """
99 Create a new object, passing "args" to the constructor
100 and then updating the object with the values from "d".
102 o = cls(*args)
103 o.update_from_dict(d)
104 return o
106 def update_from_dict(self, d):
108 Updates the attributes of this object with values from the
109 dictionary "d" by using the keys found in "d".
111 for k in d:
112 if hasattr(self, k):
113 setattr(self, k, d[k])
116 class PodcastChannel(PodcastModelObject):
117 """holds data for a complete channel"""
118 MAX_FOLDERNAME_LENGTH = 150
119 SECONDS_PER_WEEK = 7*24*60*60
121 feed_fetcher = gPodderFetcher()
123 @classmethod
124 def build_factory(cls, download_dir):
125 def factory(dict, db):
126 return cls.create_from_dict(dict, db, download_dir)
127 return factory
129 @classmethod
130 def load_from_db(cls, db, download_dir):
131 return db.load_channels(factory=cls.build_factory(download_dir))
133 @classmethod
134 def load(cls, db, url, create=True, authentication_tokens=None,\
135 max_episodes=0, download_dir=None, allow_empty_feeds=False, \
136 mimetype_prefs=''):
137 if isinstance(url, unicode):
138 url = url.encode('utf-8')
140 tmp = db.load_channels(factory=cls.build_factory(download_dir), url=url)
141 if len(tmp):
142 return tmp[0]
143 elif create:
144 tmp = PodcastChannel(db, download_dir)
145 tmp.url = url
146 if authentication_tokens is not None:
147 tmp.username = authentication_tokens[0]
148 tmp.password = authentication_tokens[1]
150 tmp.update(max_episodes, mimetype_prefs)
151 tmp.save()
152 db.force_last_new(tmp)
153 # Subscribing to empty feeds should yield an error (except if
154 # the user specifically allows empty feeds in the config UI)
155 if sum(tmp.get_statistics()) == 0 and not allow_empty_feeds:
156 tmp.delete()
157 raise Exception(_('No downloadable episodes in feed'))
158 return tmp
160 def episode_factory(self, d, db__parameter_is_unused=None):
162 This function takes a dictionary containing key-value pairs for
163 episodes and returns a new PodcastEpisode object that is connected
164 to this PodcastChannel object.
166 Returns: A new PodcastEpisode object
168 return PodcastEpisode.create_from_dict(d, self)
170 def _consume_custom_feed(self, custom_feed, max_episodes=0):
171 self.title = custom_feed.get_title()
172 self.link = custom_feed.get_link()
173 self.description = custom_feed.get_description()
174 self.image = custom_feed.get_image()
175 self.pubDate = time.time()
176 self.save()
178 guids = [episode.guid for episode in self.get_all_episodes()]
180 # Insert newly-found episodes into the database
181 custom_feed.get_new_episodes(self, guids)
183 self.save()
185 self.db.purge(max_episodes, self.id)
187 def _consume_updated_feed(self, feed, max_episodes=0, mimetype_prefs=''):
188 self.parse_error = feed.get('bozo_exception', None)
190 # Replace multi-space and newlines with single space (Maemo bug 11173)
191 self.title = re.sub('\s+', ' ', feed.feed.get('title', self.url))
193 self.link = feed.feed.get('link', self.link)
194 self.description = feed.feed.get('subtitle', self.description)
195 # Start YouTube-specific title FIX
196 YOUTUBE_PREFIX = 'Uploads by '
197 if self.title.startswith(YOUTUBE_PREFIX):
198 self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
199 # End YouTube-specific title FIX
201 try:
202 self.pubDate = rfc822.mktime_tz(feed.feed.get('updated_parsed', None+(0,)))
203 except:
204 self.pubDate = time.time()
206 if hasattr(feed.feed, 'image'):
207 for attribute in ('href', 'url'):
208 new_value = getattr(feed.feed.image, attribute, None)
209 if new_value is not None:
210 log('Found cover art in %s: %s', attribute, new_value)
211 self.image = new_value
213 if hasattr(feed.feed, 'icon'):
214 self.image = feed.feed.icon
216 self.save()
218 # Load all episodes to update them properly.
219 existing = self.get_all_episodes()
221 # We can limit the maximum number of entries that gPodder will parse
222 if max_episodes > 0 and len(feed.entries) > max_episodes:
223 entries = feed.entries[:max_episodes]
224 else:
225 entries = feed.entries
227 # Title + PubDate hashes for existing episodes
228 existing_dupes = dict((e.duplicate_id(), e) for e in existing)
230 # GUID-based existing episode list
231 existing_guids = dict((e.guid, e) for e in existing)
233 # Get most recent pubDate of all episodes
234 last_pubdate = self.db.get_last_pubdate(self) or 0
236 # Search all entries for new episodes
237 for entry in entries:
238 try:
239 episode = PodcastEpisode.from_feedparser_entry(entry, self, mimetype_prefs)
240 if episode is not None and not episode.title:
241 episode.title, ext = os.path.splitext(os.path.basename(episode.url))
242 except Exception, e:
243 log('Cannot instantiate episode: %s. Skipping.', e, sender=self, traceback=True)
244 continue
246 if episode is None:
247 continue
249 # Detect (and update) existing episode based on GUIDs
250 existing_episode = existing_guids.get(episode.guid, None)
251 if existing_episode:
252 existing_episode.update_from(episode)
253 existing_episode.save()
254 continue
256 # Detect (and update) existing episode based on duplicate ID
257 existing_episode = existing_dupes.get(episode.duplicate_id(), None)
258 if existing_episode:
259 if existing_episode.is_duplicate(episode):
260 existing_episode.update_from(episode)
261 existing_episode.save()
262 continue
264 # Workaround for bug 340: If the episode has been
265 # published earlier than one week before the most
266 # recent existing episode, do not mark it as new.
267 if episode.pubDate < last_pubdate - self.SECONDS_PER_WEEK:
268 log('Episode with old date: %s', episode.title, sender=self)
269 episode.is_played = True
271 episode.save()
273 # Remove "unreachable" episodes - episodes that have not been
274 # downloaded and that the feed does not list as downloadable anymore
275 if self.id is not None:
276 seen_guids = set(e.guid for e in feed.entries if hasattr(e, 'guid'))
277 episodes_to_purge = (e for e in existing if \
278 e.state != gpodder.STATE_DOWNLOADED and \
279 e.guid not in seen_guids and e.guid is not None)
280 for episode in episodes_to_purge:
281 log('Episode removed from feed: %s (%s)', episode.title, \
282 episode.guid, sender=self)
283 self.db.delete_episode_by_guid(episode.guid, self.id)
285 # This *might* cause episodes to be skipped if there were more than
286 # max_episodes_per_feed items added to the feed between updates.
287 # The benefit is that it prevents old episodes from apearing as new
288 # in certain situations (see bug #340).
289 self.db.purge(max_episodes, self.id)
291 def update_channel_lock(self):
292 self.db.update_channel_lock(self)
294 def _update_etag_modified(self, feed):
295 self.updated_timestamp = time.time()
296 self.calculate_publish_behaviour()
297 self.etag = feed.headers.get('etag', self.etag)
298 self.last_modified = feed.headers.get('last-modified', self.last_modified)
300 def query_automatic_update(self):
301 """Query if this channel should be updated automatically
303 Returns True if the update should happen in automatic
304 mode or False if this channel should be skipped (timeout
305 not yet reached or release not expected right now).
307 updated = self.updated_timestamp
308 expected = self.release_expected
310 now = time.time()
311 one_day_ago = now - 60*60*24
312 lastcheck = now - 60*10
314 return updated < one_day_ago or \
315 (expected < now and updated < lastcheck)
317 def update(self, max_episodes=0, mimetype_prefs=''):
318 try:
319 self.feed_fetcher.fetch_channel(self)
320 except CustomFeed, updated:
321 custom_feed = updated.data
322 self._consume_custom_feed(custom_feed, max_episodes)
323 self.save()
324 except feedcore.UpdatedFeed, updated:
325 feed = updated.data
326 self._consume_updated_feed(feed, max_episodes, mimetype_prefs)
327 self._update_etag_modified(feed)
328 self.save()
329 except feedcore.NewLocation, updated:
330 feed = updated.data
331 self.url = feed.href
332 self._consume_updated_feed(feed, max_episodes, mimetype_prefs)
333 self._update_etag_modified(feed)
334 self.save()
335 except feedcore.NotModified, updated:
336 feed = updated.data
337 self._update_etag_modified(feed)
338 self.save()
339 except Exception, e:
340 # "Not really" errors
341 #feedcore.AuthenticationRequired
342 # Temporary errors
343 #feedcore.Offline
344 #feedcore.BadRequest
345 #feedcore.InternalServerError
346 #feedcore.WifiLogin
347 # Permanent errors
348 #feedcore.Unsubscribe
349 #feedcore.NotFound
350 #feedcore.InvalidFeed
351 #feedcore.UnknownStatusCode
352 raise
354 if gpodder.user_hooks is not None:
355 gpodder.user_hooks.on_podcast_updated(self)
357 self.db.commit()
359 def delete(self):
360 self.db.delete_channel(self)
362 def save(self):
363 if gpodder.user_hooks is not None:
364 gpodder.user_hooks.on_podcast_save(self)
365 if self.foldername is None:
366 # get_save_dir() finds a unique value for foldername
367 self.get_save_dir()
368 self.db.save_channel(self)
370 def get_statistics(self):
371 if self.id is None:
372 return (0, 0, 0, 0, 0)
373 else:
374 return self.db.get_channel_count(int(self.id))
376 def authenticate_url(self, url):
377 return util.url_add_authentication(url, self.username, self.password)
379 def __init__(self, db, download_dir):
380 self.db = db
381 self.download_dir = download_dir
382 self.id = None
383 self.url = None
384 self.title = ''
385 self.link = ''
386 self.description = ''
387 self.image = None
388 self.pubDate = 0
389 self.parse_error = None
390 self.foldername = None
391 self.auto_foldername = 1 # automatically generated foldername
393 # should this channel be synced to devices? (ex: iPod)
394 self.sync_to_devices = True
395 # to which playlist should be synced
396 self.device_playlist_name = 'gPodder'
397 # if set, this overrides the channel-provided title
398 self.override_title = ''
399 self.username = ''
400 self.password = ''
402 self.last_modified = None
403 self.etag = None
405 self.save_dir_size = 0
406 self.__save_dir_size_set = False
408 self.channel_is_locked = False
410 self.release_expected = time.time()
411 self.release_deviation = 0
412 self.updated_timestamp = 0
413 self.feed_update_enabled = True
415 def calculate_publish_behaviour(self):
416 episodes = self.db.load_episodes(self, factory=self.episode_factory, limit=30)
417 if len(episodes) < 3:
418 return
420 deltas = []
421 latest = max(e.pubDate for e in episodes)
422 for index in range(len(episodes)-1):
423 if episodes[index].pubDate != 0 and episodes[index+1].pubDate != 0:
424 deltas.append(episodes[index].pubDate - episodes[index+1].pubDate)
426 if len(deltas) > 1:
427 stats = corestats.Stats(deltas)
428 self.release_expected = min([latest+stats.stdev(), latest+(stats.min()+stats.avg())*.5])
429 self.release_deviation = stats.stdev()
430 else:
431 self.release_expected = latest
432 self.release_deviation = 0
434 def request_save_dir_size(self):
435 if not self.__save_dir_size_set:
436 self.update_save_dir_size()
437 self.__save_dir_size_set = True
439 def update_save_dir_size(self):
440 self.save_dir_size = util.calculate_size(self.save_dir)
442 def get_title( self):
443 if self.override_title:
444 return self.override_title
445 elif not self.__title.strip():
446 return self.url
447 else:
448 return self.__title
450 def set_title( self, value):
451 self.__title = value.strip()
453 title = property(fget=get_title,
454 fset=set_title)
456 def set_custom_title( self, custom_title):
457 custom_title = custom_title.strip()
459 # if the custom title is the same as we have
460 if custom_title == self.override_title:
461 return
463 # if custom title is the same as channel title and we didn't have a custom title
464 if custom_title == self.__title and self.override_title == '':
465 return
467 # make sure self.foldername is initialized
468 self.get_save_dir()
470 # rename folder if custom_title looks sane
471 new_folder_name = self.find_unique_folder_name(custom_title)
472 if len(new_folder_name) > 0 and new_folder_name != self.foldername:
473 log('Changing foldername based on custom title: %s', custom_title, sender=self)
474 new_folder = os.path.join(self.download_dir, new_folder_name)
475 old_folder = os.path.join(self.download_dir, self.foldername)
476 if os.path.exists(old_folder):
477 if not os.path.exists(new_folder):
478 # Old folder exists, new folder does not -> simply rename
479 log('Renaming %s => %s', old_folder, new_folder, sender=self)
480 os.rename(old_folder, new_folder)
481 else:
482 # Both folders exist -> move files and delete old folder
483 log('Moving files from %s to %s', old_folder, new_folder, sender=self)
484 for file in glob.glob(os.path.join(old_folder, '*')):
485 shutil.move(file, new_folder)
486 log('Removing %s', old_folder, sender=self)
487 shutil.rmtree(old_folder, ignore_errors=True)
488 self.foldername = new_folder_name
489 self.save()
491 if custom_title != self.__title:
492 self.override_title = custom_title
493 else:
494 self.override_title = ''
496 def get_downloaded_episodes(self):
497 return self.db.load_episodes(self, factory=self.episode_factory, state=gpodder.STATE_DOWNLOADED)
499 def get_new_episodes(self, downloading=lambda e: False):
501 Get a list of new episodes. You can optionally specify
502 "downloading" as a callback that takes an episode as
503 a parameter and returns True if the episode is currently
504 being downloaded or False if not.
506 By default, "downloading" is implemented so that it
507 reports all episodes as not downloading.
509 return [episode for episode in self.db.load_episodes(self, \
510 factory=self.episode_factory, state=gpodder.STATE_NORMAL) if \
511 episode.check_is_new(downloading=downloading)]
513 def get_playlist_filename(self):
514 # If the save_dir doesn't end with a slash (which it really should
515 # not, if the implementation is correct, we can just append .m3u :)
516 assert self.save_dir[-1] != '/'
517 return self.save_dir+'.m3u'
519 def update_m3u_playlist(self):
520 m3u_filename = self.get_playlist_filename()
522 downloaded_episodes = self.get_downloaded_episodes()
523 if not downloaded_episodes:
524 log('No episodes - removing %s', m3u_filename, sender=self)
525 util.delete_file(m3u_filename)
526 return
528 log('Writing playlist to %s', m3u_filename, sender=self)
529 util.write_m3u_playlist(m3u_filename, \
530 PodcastEpisode.sort_by_pubdate(downloaded_episodes))
532 def get_episode_by_url(self, url):
533 return self.db.load_single_episode(self, \
534 factory=self.episode_factory, url=url)
536 def get_episode_by_filename(self, filename):
537 return self.db.load_single_episode(self, \
538 factory=self.episode_factory, filename=filename)
540 def get_all_episodes(self):
541 return self.db.load_episodes(self, factory=self.episode_factory)
543 def find_unique_folder_name(self, foldername):
544 # Remove trailing dots to avoid errors on Windows (bug 600)
545 foldername = foldername.strip().rstrip('.')
547 current_try = util.sanitize_filename(foldername, \
548 self.MAX_FOLDERNAME_LENGTH)
549 next_try_id = 2
551 while True:
552 if self.db.channel_foldername_exists(current_try):
553 current_try = '%s (%d)' % (foldername, next_try_id)
554 next_try_id += 1
555 else:
556 return current_try
558 def get_save_dir(self):
559 urldigest = hashlib.md5(self.url).hexdigest()
560 sanitizedurl = util.sanitize_filename(self.url, self.MAX_FOLDERNAME_LENGTH)
561 if self.foldername is None or (self.auto_foldername and (self.foldername == urldigest or self.foldername.startswith(sanitizedurl))):
562 # we must change the folder name, because it has not been set manually
563 fn_template = util.sanitize_filename(self.title, self.MAX_FOLDERNAME_LENGTH)
565 # if this is an empty string, try the basename
566 if len(fn_template) == 0:
567 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
568 fn_template = util.sanitize_filename(os.path.basename(self.url), self.MAX_FOLDERNAME_LENGTH)
570 # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
571 if len(fn_template) == 0:
572 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
573 fn_template = urldigest # no need for sanitize_filename here
575 # Find a unique folder name for this podcast
576 wanted_foldername = self.find_unique_folder_name(fn_template)
578 # if the foldername has not been set, check if the (old) md5 filename exists
579 if self.foldername is None and os.path.exists(os.path.join(self.download_dir, urldigest)):
580 log('Found pre-0.15.0 download folder for %s: %s', self.title, urldigest, sender=self)
581 self.foldername = urldigest
583 # we have a valid, new folder name in "current_try" -> use that!
584 if self.foldername is not None and wanted_foldername != self.foldername:
585 # there might be an old download folder crawling around - move it!
586 new_folder_name = os.path.join(self.download_dir, wanted_foldername)
587 old_folder_name = os.path.join(self.download_dir, self.foldername)
588 if os.path.exists(old_folder_name):
589 if not os.path.exists(new_folder_name):
590 # Old folder exists, new folder does not -> simply rename
591 log('Renaming %s => %s', old_folder_name, new_folder_name, sender=self)
592 os.rename(old_folder_name, new_folder_name)
593 else:
594 # Both folders exist -> move files and delete old folder
595 log('Moving files from %s to %s', old_folder_name, new_folder_name, sender=self)
596 for file in glob.glob(os.path.join(old_folder_name, '*')):
597 shutil.move(file, new_folder_name)
598 log('Removing %s', old_folder_name, sender=self)
599 shutil.rmtree(old_folder_name, ignore_errors=True)
600 log('Updating foldername of %s to "%s".', self.url, wanted_foldername, sender=self)
601 self.foldername = wanted_foldername
602 self.save()
604 save_dir = os.path.join(self.download_dir, self.foldername)
606 # Create save_dir if it does not yet exist
607 if not util.make_directory( save_dir):
608 log( 'Could not create save_dir: %s', save_dir, sender = self)
610 return save_dir
612 save_dir = property(fget=get_save_dir)
614 def remove_downloaded(self):
615 # Remove the playlist file if it exists
616 m3u_filename = self.get_playlist_filename()
617 if os.path.exists(m3u_filename):
618 util.delete_file(m3u_filename)
620 # Remove the download directory
621 shutil.rmtree(self.save_dir, True)
623 @property
624 def cover_file(self):
625 new_name = os.path.join(self.save_dir, 'folder.jpg')
626 if not os.path.exists(new_name):
627 old_names = ('cover', '.cover')
628 for old_name in old_names:
629 filename = os.path.join(self.save_dir, old_name)
630 if os.path.exists(filename):
631 shutil.move(filename, new_name)
632 return new_name
634 return new_name
636 def delete_episode(self, episode):
637 filename = episode.local_filename(create=False, check_only=True)
638 if filename is not None:
639 util.delete_file(filename)
641 episode.set_state(gpodder.STATE_DELETED)
644 class PodcastEpisode(PodcastModelObject):
645 """holds data for one object in a channel"""
646 MAX_FILENAME_LENGTH = 200
648 def _get_played(self):
649 return self.is_played
651 def _set_played(self, played):
652 self.is_played = played
654 # Alias "is_played" to "played" for DB column mapping
655 played = property(fget=_get_played, fset=_set_played)
657 def _get_locked(self):
658 return self.is_locked
660 def _set_locked(self, locked):
661 self.is_locked = locked
663 # Alias "is_locked" to "locked" for DB column mapping
664 locked = property(fget=_get_locked, fset=_set_locked)
666 def _get_channel_id(self):
667 return self.channel.id
669 def _set_channel_id(self, channel_id):
670 assert self.channel.id == channel_id
672 # Accessor for the "channel_id" DB column
673 channel_id = property(fget=_get_channel_id, fset=_set_channel_id)
675 @staticmethod
676 def sort_by_pubdate(episodes, reverse=False):
677 """Sort a list of PodcastEpisode objects chronologically
679 Returns a iterable, sorted sequence of the episodes
681 key_pubdate = lambda e: e.pubDate
682 return sorted(episodes, key=key_pubdate, reverse=reverse)
684 def reload_from_db(self):
686 Re-reads all episode details for this object from the
687 database and updates this object accordingly. Can be
688 used to refresh existing objects when the database has
689 been updated (e.g. the filename has been set after a
690 download where it was not set before the download)
692 d = self.db.load_episode(self.id)
693 self.update_from_dict(d or {})
694 return self
696 def has_website_link(self):
697 return bool(self.link) and (self.link != self.url or \
698 youtube.is_video_link(self.link))
700 @staticmethod
701 def from_feedparser_entry(entry, channel, mimetype_prefs=''):
702 episode = PodcastEpisode(channel)
704 # Replace multi-space and newlines with single space (Maemo bug 11173)
705 episode.title = re.sub('\s+', ' ', entry.get('title', ''))
706 episode.link = entry.get('link', '')
707 if 'content' in entry and len(entry['content']) and \
708 entry['content'][0].type == 'text/html':
709 episode.description = entry['content'][0].value
710 else:
711 episode.description = entry.get('summary', '')
713 try:
714 # Parse iTunes-specific podcast duration metadata
715 total_time = util.parse_time(entry.get('itunes_duration', ''))
716 episode.total_time = total_time
717 except:
718 pass
720 # Fallback to subtitle if summary is not available0
721 if not episode.description:
722 episode.description = entry.get('subtitle', '')
724 episode.guid = entry.get('id', '')
725 if entry.get('updated_parsed', None):
726 episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))
728 enclosures = entry.get('enclosures', ())
729 audio_available = any(e.get('type', '').startswith('audio/') \
730 for e in enclosures)
731 video_available = any(e.get('type', '').startswith('video/') \
732 for e in enclosures)
734 # Create the list of preferred mime types
735 mimetype_prefs = mimetype_prefs.split(',')
737 def calculate_preference_value(enclosure):
738 """Calculate preference value of an enclosure
740 This is based on mime types and allows users to prefer
741 certain mime types over others (e.g. MP3 over AAC, ...)
743 mimetype = enclosure.get('type', None)
744 try:
745 # If the mime type is found, return its (zero-based) index
746 return mimetype_prefs.index(mimetype)
747 except ValueError:
748 # If it is not found, assume it comes after all listed items
749 return len(mimetype_prefs)
751 # Enclosures
752 for e in sorted(enclosures, key=calculate_preference_value):
753 episode.mimetype = e.get('type', 'application/octet-stream')
754 if episode.mimetype == '':
755 # See Maemo bug 10036
756 log('Fixing empty mimetype in ugly feed', sender=episode)
757 episode.mimetype = 'application/octet-stream'
759 if '/' not in episode.mimetype:
760 continue
762 # Skip images in feeds if audio or video is available (bug 979)
763 if episode.mimetype.startswith('image/') and \
764 (audio_available or video_available):
765 continue
767 episode.url = util.normalize_feed_url(e.get('href', ''))
768 if not episode.url:
769 continue
771 try:
772 episode.length = int(e.length) or -1
773 except:
774 episode.length = -1
776 return episode
778 # Media RSS content
779 for m in entry.get('media_content', ()):
780 episode.mimetype = m.get('type', 'application/octet-stream')
781 if '/' not in episode.mimetype:
782 continue
784 episode.url = util.normalize_feed_url(m.get('url', ''))
785 if not episode.url:
786 continue
788 try:
789 episode.length = int(m.fileSize) or -1
790 except:
791 episode.length = -1
793 return episode
795 # Brute-force detection of any links
796 for l in entry.get('links', ()):
797 episode.url = util.normalize_feed_url(l.get('href', ''))
798 if not episode.url:
799 continue
801 if youtube.is_video_link(episode.url):
802 return episode
804 # Check if we can resolve this link to a audio/video file
805 filename, extension = util.filename_from_url(episode.url)
806 file_type = util.file_type_by_extension(extension)
807 if file_type is None and hasattr(l, 'type'):
808 extension = util.extension_from_mimetype(l.type)
809 file_type = util.file_type_by_extension(extension)
811 # The link points to a audio or video file - use it!
812 if file_type is not None:
813 return episode
815 # Scan MP3 links in description text
816 mp3s = re.compile(r'http://[^"]*\.mp3')
817 for content in entry.get('content', ()):
818 html = content.value
819 for match in mp3s.finditer(html):
820 episode.url = match.group(0)
821 return episode
823 return None
825 def __init__(self, channel):
826 self.db = channel.db
827 # Used by Storage for faster saving
828 self.id = None
829 self.url = ''
830 self.title = ''
831 self.length = 0
832 self.mimetype = 'application/octet-stream'
833 self.guid = ''
834 self.description = ''
835 self.link = ''
836 self.channel = channel
837 self.pubDate = 0
838 self.filename = None
839 self.auto_filename = 1 # automatically generated filename
841 self.state = gpodder.STATE_NORMAL
842 self.is_played = False
844 # Initialize the "is_locked" property
845 self._is_locked = False
846 self.is_locked = channel.channel_is_locked
848 # Time attributes
849 self.total_time = 0
850 self.current_position = 0
851 self.current_position_updated = 0
853 def get_is_locked(self):
854 return self._is_locked
856 def set_is_locked(self, is_locked):
857 self._is_locked = bool(is_locked)
859 is_locked = property(fget=get_is_locked, fset=set_is_locked)
861 def save(self):
862 if self.state != gpodder.STATE_DOWNLOADED and self.file_exists():
863 self.state = gpodder.STATE_DOWNLOADED
864 if gpodder.user_hooks is not None:
865 gpodder.user_hooks.on_episode_save(self)
866 self.db.save_episode(self)
868 def on_downloaded(self, filename):
869 self.state = gpodder.STATE_DOWNLOADED
870 self.is_played = False
871 self.length = os.path.getsize(filename)
873 if not self.total_time:
874 try:
875 length = gstreamer.get_track_length(filename)
876 if length is not None:
877 length = int(length/1000)
878 log('Detected media length: %d seconds', length, \
879 sender=self)
880 self.total_time = length
881 self.db.save_episode(self)
882 self.db.commit()
883 return
884 except Exception, e:
885 log('Error while detecting media length: %s', str(e), \
886 sender=self)
888 self.db.save_downloaded_episode(self)
889 self.db.commit()
891 def set_state(self, state):
892 self.state = state
893 self.db.update_episode_state(self)
895 def mark(self, state=None, is_played=None, is_locked=None):
896 if state is not None:
897 self.state = state
898 if is_played is not None:
899 self.is_played = is_played
900 if is_locked is not None:
901 self.is_locked = is_locked
902 self.db.update_episode_state(self)
904 @property
905 def title_markup(self):
906 return '%s\n<small>%s</small>' % (xml.sax.saxutils.escape(self.title),
907 xml.sax.saxutils.escape(self.channel.title))
909 @property
910 def maemo_markup(self):
911 if self.length > 0:
912 length_str = '%s; ' % self.filesize_prop
913 else:
914 length_str = ''
915 return ('<b>%s</b>\n<small>%s'+_('released %s')+ \
916 '; '+_('from %s')+'</small>') % (\
917 xml.sax.saxutils.escape(re.sub('\s+', ' ', self.title)), \
918 xml.sax.saxutils.escape(length_str), \
919 xml.sax.saxutils.escape(self.pubdate_prop), \
920 xml.sax.saxutils.escape(re.sub('\s+', ' ', self.channel.title)))
922 @property
923 def maemo_remove_markup(self):
924 if self.is_played:
925 played_string = _('played')
926 else:
927 played_string = _('unplayed')
928 downloaded_string = self.get_age_string()
929 if not downloaded_string:
930 downloaded_string = _('today')
931 return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
932 '; '+_('from %s')+'</small>') % (\
933 xml.sax.saxutils.escape(self.title), \
934 xml.sax.saxutils.escape(self.filesize_prop), \
935 xml.sax.saxutils.escape(played_string), \
936 xml.sax.saxutils.escape(downloaded_string), \
937 xml.sax.saxutils.escape(self.channel.title))
939 def age_in_days(self):
940 return util.file_age_in_days(self.local_filename(create=False, \
941 check_only=True))
943 age_int_prop = property(fget=age_in_days)
945 def get_age_string(self):
946 return util.file_age_to_string(self.age_in_days())
948 age_prop = property(fget=get_age_string)
950 def one_line_description( self):
951 lines = util.remove_html_tags(self.description or '').strip().splitlines()
952 if not lines or lines[0] == '':
953 return _('No description available')
954 else:
955 return ' '.join(lines)
957 def delete_from_disk(self):
958 try:
959 self.channel.delete_episode(self)
960 except:
961 log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self)
963 def find_unique_file_name(self, url, filename, extension):
964 current_try = util.sanitize_filename(filename, self.MAX_FILENAME_LENGTH)+extension
965 next_try_id = 2
966 lookup_url = None
968 if self.filename == current_try and current_try is not None:
969 # We already have this filename - good!
970 return current_try
972 while self.db.episode_filename_exists(current_try):
973 current_try = '%s (%d)%s' % (filename, next_try_id, extension)
974 next_try_id += 1
976 return current_try
978 def local_filename(self, create, force_update=False, check_only=False,
979 template=None):
980 """Get (and possibly generate) the local saving filename
982 Pass create=True if you want this function to generate a
983 new filename if none exists. You only want to do this when
984 planning to create/download the file after calling this function.
986 Normally, you should pass create=False. This will only
987 create a filename when the file already exists from a previous
988 version of gPodder (where we used md5 filenames). If the file
989 does not exist (and the filename also does not exist), this
990 function will return None.
992 If you pass force_update=True to this function, it will try to
993 find a new (better) filename and move the current file if this
994 is the case. This is useful if (during the download) you get
995 more information about the file, e.g. the mimetype and you want
996 to include this information in the file name generation process.
998 If check_only=True is passed to this function, it will never try
999 to rename the file, even if would be a good idea. Use this if you
1000 only want to check if a file exists.
1002 If "template" is specified, it should be a filename that is to
1003 be used as a template for generating the "real" filename.
1005 The generated filename is stored in the database for future access.
1007 ext = self.extension(may_call_local_filename=False).encode('utf-8', 'ignore')
1009 # For compatibility with already-downloaded episodes, we
1010 # have to know md5 filenames if they are downloaded already
1011 urldigest = hashlib.md5(self.url).hexdigest()
1013 if not create and self.filename is None:
1014 urldigest_filename = os.path.join(self.channel.save_dir, urldigest+ext)
1015 if os.path.exists(urldigest_filename):
1016 # The file exists, so set it up in our database
1017 log('Recovering pre-0.15.0 file: %s', urldigest_filename, sender=self)
1018 self.filename = urldigest+ext
1019 self.auto_filename = 1
1020 self.save()
1021 return urldigest_filename
1022 return None
1024 # We only want to check if the file exists, so don't try to
1025 # rename the file, even if it would be reasonable. See also:
1026 # http://bugs.gpodder.org/attachment.cgi?id=236
1027 if check_only:
1028 if self.filename is None:
1029 return None
1030 else:
1031 return os.path.join(self.channel.save_dir, self.filename)
1033 if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
1034 # Try to find a new filename for the current file
1035 if template is not None:
1036 # If template is specified, trust the template's extension
1037 episode_filename, ext = os.path.splitext(template)
1038 else:
1039 episode_filename, extension_UNUSED = util.filename_from_url(self.url)
1040 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1042 if 'redirect' in fn_template and template is None:
1043 # This looks like a redirection URL - force URL resolving!
1044 log('Looks like a redirection to me: %s', self.url, sender=self)
1045 url = util.get_real_url(self.channel.authenticate_url(self.url))
1046 log('Redirection resolved to: %s', url, sender=self)
1047 (episode_filename, extension_UNUSED) = util.filename_from_url(url)
1048 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
1050 # Use the video title for YouTube downloads
1051 for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
1052 if self.url.startswith(yt_url):
1053 fn_template = util.sanitize_filename(os.path.basename(self.title), self.MAX_FILENAME_LENGTH)
1055 # If the basename is empty, use the md5 hexdigest of the URL
1056 if len(fn_template) == 0 or fn_template.startswith('redirect.'):
1057 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self.channel.url, self.url, sender=self)
1058 fn_template = urldigest
1060 # Find a unique filename for this episode
1061 wanted_filename = self.find_unique_file_name(self.url, fn_template, ext)
1063 # We populate the filename field the first time - does the old file still exist?
1064 if self.filename is None and os.path.exists(os.path.join(self.channel.save_dir, urldigest+ext)):
1065 log('Found pre-0.15.0 downloaded file: %s', urldigest, sender=self)
1066 self.filename = urldigest+ext
1068 # The old file exists, but we have decided to want a different filename
1069 if self.filename is not None and wanted_filename != self.filename:
1070 # there might be an old download folder crawling around - move it!
1071 new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
1072 old_file_name = os.path.join(self.channel.save_dir, self.filename)
1073 if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
1074 log('Renaming %s => %s', old_file_name, new_file_name, sender=self)
1075 os.rename(old_file_name, new_file_name)
1076 elif force_update and not os.path.exists(old_file_name):
1077 # When we call force_update, the file might not yet exist when we
1078 # call it from the downloading code before saving the file
1079 log('Choosing new filename: %s', new_file_name, sender=self)
1080 else:
1081 log('Warning: %s exists or %s does not.', new_file_name, old_file_name, sender=self)
1082 log('Updating filename of %s to "%s".', self.url, wanted_filename, sender=self)
1083 elif self.filename is None:
1084 log('Setting filename to "%s".', wanted_filename, sender=self)
1085 else:
1086 log('Should update filename. Stays the same (%s). Good!', \
1087 wanted_filename, sender=self)
1088 self.filename = wanted_filename
1089 self.save()
1090 self.db.commit()
1092 return os.path.join(self.channel.save_dir, self.filename)
1094 def set_mimetype(self, mimetype, commit=False):
1095 """Sets the mimetype for this episode"""
1096 self.mimetype = mimetype
1097 if commit:
1098 self.db.commit()
1100 def extension(self, may_call_local_filename=True):
1101 filename, ext = util.filename_from_url(self.url)
1102 if may_call_local_filename:
1103 filename = self.local_filename(create=False)
1104 if filename is not None:
1105 filename, ext = os.path.splitext(filename)
1106 # if we can't detect the extension from the url fallback on the mimetype
1107 if ext == '' or util.file_type_by_extension(ext) is None:
1108 ext = util.extension_from_mimetype(self.mimetype)
1109 return ext
1111 def check_is_new(self, downloading=lambda e: False):
1113 Returns True if this episode is to be considered new.
1114 "Downloading" should be a callback that gets an episode
1115 as its parameter and returns True if the episode is
1116 being downloaded at the moment.
1118 return self.state == gpodder.STATE_NORMAL and \
1119 not self.is_played and \
1120 not downloading(self)
1122 def mark_new(self):
1123 self.state = gpodder.STATE_NORMAL
1124 self.is_played = False
1125 self.db.update_episode_state(self)
1127 def mark_old(self):
1128 self.is_played = True
1129 self.db.update_episode_state(self)
1131 def file_exists(self):
1132 filename = self.local_filename(create=False, check_only=True)
1133 if filename is None:
1134 return False
1135 else:
1136 return os.path.exists(filename)
1138 def was_downloaded(self, and_exists=False):
1139 if self.state != gpodder.STATE_DOWNLOADED:
1140 return False
1141 if and_exists and not self.file_exists():
1142 return False
1143 return True
1145 def sync_filename(self, use_custom=False, custom_format=None):
1146 if use_custom:
1147 return util.object_string_formatter(custom_format,
1148 episode=self, podcast=self.channel)
1149 else:
1150 return self.title
1152 def file_type(self):
1153 # Assume all YouTube links are video files
1154 if youtube.is_video_link(self.url):
1155 return 'video'
1157 return util.file_type_by_extension(self.extension())
1159 @property
1160 def basename( self):
1161 return os.path.splitext( os.path.basename( self.url))[0]
1163 @property
1164 def published( self):
1166 Returns published date as YYYYMMDD (or 00000000 if not available)
1168 try:
1169 return datetime.datetime.fromtimestamp(self.pubDate).strftime('%Y%m%d')
1170 except:
1171 log( 'Cannot format pubDate for "%s".', self.title, sender = self)
1172 return '00000000'
1174 @property
1175 def pubtime(self):
1177 Returns published time as HHMM (or 0000 if not available)
1179 try:
1180 return datetime.datetime.fromtimestamp(self.pubDate).strftime('%H%M')
1181 except:
1182 log('Cannot format pubDate (time) for "%s".', self.title, sender=self)
1183 return '0000'
1185 def playlist_title(self):
1186 """Return a title for this episode in a playlist
1188 The title will be composed of the podcast name, the
1189 episode name and the publication date. The return
1190 value is the canonical representation of this episode
1191 in playlists (for example, M3U playlists).
1193 return '%s - %s (%s)' % (self.channel.title, \
1194 self.title, \
1195 self.cute_pubdate())
1197 def cute_pubdate(self):
1198 result = util.format_date(self.pubDate)
1199 if result is None:
1200 return '(%s)' % _('unknown')
1201 else:
1202 return result
1204 pubdate_prop = property(fget=cute_pubdate)
1206 def calculate_filesize( self):
1207 filename = self.local_filename(create=False)
1208 if filename is None:
1209 log('calculate_filesized called, but filename is None!', sender=self)
1210 try:
1211 self.length = os.path.getsize(filename)
1212 except:
1213 log( 'Could not get filesize for %s.', self.url)
1215 def get_play_info_string(self):
1216 if self.current_position > 0 and \
1217 self.total_time <= self.current_position:
1218 return '%s (%s)' % (_('Finished'), self.get_duration_string(),)
1219 if self.current_position > 0:
1220 return '%s / %s' % (self.get_position_string(), \
1221 self.get_duration_string())
1222 else:
1223 return self.get_duration_string()
1225 def get_position_string(self):
1226 return util.format_time(self.current_position)
1228 def get_duration_string(self):
1229 return util.format_time(self.total_time)
1231 def get_filesize_string(self):
1232 return util.format_filesize(self.length)
1234 filesize_prop = property(fget=get_filesize_string)
1236 def get_played_string( self):
1237 if not self.is_played:
1238 return _('Unplayed')
1240 return ''
1242 played_prop = property(fget=get_played_string)
1244 def is_duplicate(self, episode):
1245 if self.title == episode.title and self.pubDate == episode.pubDate:
1246 log('Possible duplicate detected: %s', self.title)
1247 return True
1248 return False
1250 def duplicate_id(self):
1251 return hash((self.title, self.pubDate))
1253 def update_from(self, episode):
1254 for k in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
1255 setattr(self, k, getattr(episode, k))