Fix a regression for custom feeds (e.g. Soundcloud)
[gpodder.git] / src / gpodder / model.py
bloba2ae1c75ce80bd6904042a693052ef7e518d1059
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # gpodder.model - Core model classes for gPodder (2009-08-13)
23 # Based on libpodcasts.py (thp, 2005-10-29)
26 import gpodder
27 from gpodder import util
28 from gpodder import feedcore
29 from gpodder import youtube
30 from gpodder import corestats
32 from gpodder.liblogger import log
34 import os
35 import re
36 import glob
37 import shutil
38 import urllib
39 import urlparse
40 import time
41 import datetime
42 import rfc822
43 import hashlib
44 import feedparser
45 import xml.sax.saxutils
47 _ = gpodder.gettext
50 class CustomFeed(feedcore.ExceptionWithData): pass
52 class gPodderFetcher(feedcore.Fetcher):
53 """
54 This class extends the feedcore Fetcher with the gPodder User-Agent and the
55 Proxy handler based on the current settings in gPodder and provides a
56 convenience method (fetch_channel) for use by PodcastChannel objects.
57 """
58 custom_handlers = []
60 def __init__(self):
61 feedcore.Fetcher.__init__(self, gpodder.user_agent)
63 def fetch_channel(self, channel):
64 etag = channel.etag
65 modified = feedparser._parse_date(channel.last_modified)
66 # If we have a username or password, rebuild the url with them included
67 # Note: using a HTTPBasicAuthHandler would be pain because we need to
68 # know the realm. It can be done, but I think this method works, too
69 url = channel.authenticate_url(channel.url)
70 for handler in self.custom_handlers:
71 custom_feed = handler.handle_url(url)
72 if custom_feed is not None:
73 raise CustomFeed(custom_feed)
74 self.fetch(url, etag, modified)
76 def _resolve_url(self, url):
77 return youtube.get_real_channel_url(url)
79 @classmethod
80 def register(cls, handler):
81 cls.custom_handlers.append(handler)
83 # def _get_handlers(self):
84 # # Add a ProxyHandler for fetching data via a proxy server
85 # proxies = {'http': 'http://proxy.example.org:8080'}
86 # return[urllib2.ProxyHandler(proxies))]
88 # The "register" method is exposed here for external usage
89 register_custom_handler = gPodderFetcher.register
91 class PodcastModelObject(object):
92 """
93 A generic base class for our podcast model providing common helper
94 and utility functions.
95 """
97 @classmethod
98 def create_from_dict(cls, d, *args):
99 """
100 Create a new object, passing "args" to the constructor
101 and then updating the object with the values from "d".
103 o = cls(*args)
104 o.update_from_dict(d)
105 return o
107 def update_from_dict(self, d):
109 Updates the attributes of this object with values from the
110 dictionary "d" by using the keys found in "d".
112 for k in d:
113 if hasattr(self, k):
114 setattr(self, k, d[k])
117 class PodcastChannel(PodcastModelObject):
118 """holds data for a complete channel"""
119 MAX_FOLDERNAME_LENGTH = 150
120 SECONDS_PER_WEEK = 7*24*60*60
122 feed_fetcher = gPodderFetcher()
124 @classmethod
125 def build_factory(cls, download_dir):
126 def factory(dict, db):
127 return cls.create_from_dict(dict, db, download_dir)
128 return factory
130 @classmethod
131 def load_from_db(cls, db, download_dir):
132 return db.load_channels(factory=cls.build_factory(download_dir))
134 @classmethod
135 def load(cls, db, url, create=True, authentication_tokens=None,\
136 max_episodes=0, download_dir=None, allow_empty_feeds=False):
137 if isinstance(url, unicode):
138 url = url.encode('utf-8')
140 tmp = db.load_channels(factory=cls.build_factory(download_dir), url=url)
141 if len(tmp):
142 return tmp[0]
143 elif create:
144 tmp = PodcastChannel(db, download_dir)
145 tmp.url = url
146 if authentication_tokens is not None:
147 tmp.username = authentication_tokens[0]
148 tmp.password = authentication_tokens[1]
150 tmp.update(max_episodes)
151 tmp.save()
152 db.force_last_new(tmp)
153 # Subscribing to empty feeds should yield an error (except if
154 # the user specifically allows empty feeds in the config UI)
155 if sum(tmp.get_statistics()) == 0 and not allow_empty_feeds:
156 tmp.delete()
157 raise Exception(_('No downloadable episodes in feed'))
158 return tmp
160 def episode_factory(self, d, db__parameter_is_unused=None):
162 This function takes a dictionary containing key-value pairs for
163 episodes and returns a new PodcastEpisode object that is connected
164 to this PodcastChannel object.
166 Returns: A new PodcastEpisode object
168 return PodcastEpisode.create_from_dict(d, self)
170 def _consume_custom_feed(self, custom_feed, max_episodes=0):
171 self.title = custom_feed.get_title()
172 self.link = custom_feed.get_link()
173 self.description = custom_feed.get_description()
174 self.image = custom_feed.get_image()
175 self.pubDate = time.time()
176 self.save()
178 guids = [episode.guid for episode in self.get_all_episodes()]
180 # Insert newly-found episodes into the database
181 custom_feed.get_new_episodes(self, guids)
183 self.save()
185 self.db.purge(max_episodes, self.id)
187 def _consume_updated_feed(self, feed, max_episodes=0):
188 self.parse_error = feed.get('bozo_exception', None)
190 self.title = feed.feed.get('title', self.url)
191 self.link = feed.feed.get('link', self.link)
192 self.description = feed.feed.get('subtitle', self.description)
193 # Start YouTube-specific title FIX
194 YOUTUBE_PREFIX = 'Uploads by '
195 if self.title.startswith(YOUTUBE_PREFIX):
196 self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
197 # End YouTube-specific title FIX
199 try:
200 self.pubDate = rfc822.mktime_tz(feed.feed.get('updated_parsed', None+(0,)))
201 except:
202 self.pubDate = time.time()
204 if hasattr(feed.feed, 'image'):
205 for attribute in ('href', 'url'):
206 new_value = getattr(feed.feed.image, attribute, None)
207 if new_value is not None:
208 log('Found cover art in %s: %s', attribute, new_value)
209 self.image = new_value
211 if hasattr(feed.feed, 'icon'):
212 self.image = feed.feed.icon
214 self.save()
216 # Load all episodes to update them properly.
217 existing = self.get_all_episodes()
219 # We can limit the maximum number of entries that gPodder will parse
220 if max_episodes > 0 and len(feed.entries) > max_episodes:
221 entries = feed.entries[:max_episodes]
222 else:
223 entries = feed.entries
225 # Title + PubDate hashes for existing episodes
226 existing_dupes = dict((e.duplicate_id(), e) for e in existing)
228 # GUID-based existing episode list
229 existing_guids = dict((e.guid, e) for e in existing)
231 # Get most recent pubDate of all episodes
232 last_pubdate = self.db.get_last_pubdate(self) or 0
234 # Search all entries for new episodes
235 for entry in entries:
236 try:
237 episode = PodcastEpisode.from_feedparser_entry(entry, self)
238 if episode is not None and not episode.title:
239 episode.title, ext = os.path.splitext(os.path.basename(episode.url))
240 except Exception, e:
241 log('Cannot instantiate episode: %s. Skipping.', e, sender=self, traceback=True)
242 continue
244 if episode is None:
245 continue
247 # Detect (and update) existing episode based on GUIDs
248 existing_episode = existing_guids.get(episode.guid, None)
249 if existing_episode:
250 existing_episode.update_from(episode)
251 existing_episode.save()
252 continue
254 # Detect (and update) existing episode based on duplicate ID
255 existing_episode = existing_dupes.get(episode.duplicate_id(), None)
256 if existing_episode:
257 if existing_episode.is_duplicate(episode):
258 existing_episode.update_from(episode)
259 existing_episode.save()
260 continue
262 # Workaround for bug 340: If the episode has been
263 # published earlier than one week before the most
264 # recent existing episode, do not mark it as new.
265 if episode.pubDate < last_pubdate - self.SECONDS_PER_WEEK:
266 log('Episode with old date: %s', episode.title, sender=self)
267 episode.is_played = True
269 episode.save()
271 # Remove "unreachable" episodes - episodes that have not been
272 # downloaded and that the feed does not list as downloadable anymore
273 if self.id is not None:
274 seen_guids = set(e.guid for e in feed.entries if hasattr(e, 'guid'))
275 episodes_to_purge = (e for e in existing if \
276 e.state != gpodder.STATE_DOWNLOADED and \
277 e.guid not in seen_guids and e.guid is not None)
278 for episode in episodes_to_purge:
279 log('Episode removed from feed: %s (%s)', episode.title, \
280 episode.guid, sender=self)
281 self.db.delete_episode_by_guid(episode.guid, self.id)
283 # This *might* cause episodes to be skipped if there were more than
284 # max_episodes_per_feed items added to the feed between updates.
285 # The benefit is that it prevents old episodes from apearing as new
286 # in certain situations (see bug #340).
287 self.db.purge(max_episodes, self.id)
289 def update_channel_lock(self):
290 self.db.update_channel_lock(self)
292 def _update_etag_modified(self, feed):
293 self.updated_timestamp = time.time()
294 self.calculate_publish_behaviour()
295 self.etag = feed.headers.get('etag', self.etag)
296 self.last_modified = feed.headers.get('last-modified', self.last_modified)
298 def query_automatic_update(self):
299 """Query if this channel should be updated automatically
301 Returns True if the update should happen in automatic
302 mode or False if this channel should be skipped (timeout
303 not yet reached or release not expected right now).
305 updated = self.updated_timestamp
306 expected = self.release_expected
308 now = time.time()
309 one_day_ago = now - 60*60*24
310 lastcheck = now - 60*10
312 return updated < one_day_ago or \
313 (expected < now and updated < lastcheck)
315 def update(self, max_episodes=0):
316 try:
317 self.feed_fetcher.fetch_channel(self)
318 except CustomFeed, updated:
319 custom_feed = updated.data
320 self._consume_custom_feed(custom_feed, max_episodes)
321 self.save()
322 except feedcore.UpdatedFeed, updated:
323 feed = updated.data
324 self._consume_updated_feed(feed, max_episodes)
325 self._update_etag_modified(feed)
326 self.save()
327 except feedcore.NewLocation, updated:
328 feed = updated.data
329 self.url = feed.href
330 self._consume_updated_feed(feed, max_episodes)
331 self._update_etag_modified(feed)
332 self.save()
333 except feedcore.NotModified, updated:
334 feed = updated.data
335 self._update_etag_modified(feed)
336 self.save()
337 except Exception, e:
338 # "Not really" errors
339 #feedcore.AuthenticationRequired
340 # Temporary errors
341 #feedcore.Offline
342 #feedcore.BadRequest
343 #feedcore.InternalServerError
344 #feedcore.WifiLogin
345 # Permanent errors
346 #feedcore.Unsubscribe
347 #feedcore.NotFound
348 #feedcore.InvalidFeed
349 #feedcore.UnknownStatusCode
350 raise
352 self.db.commit()
354 def delete(self):
355 self.db.delete_channel(self)
357 def save(self):
358 self.db.save_channel(self)
360 def get_statistics(self):
361 if self.id is None:
362 return (0, 0, 0, 0, 0)
363 else:
364 return self.db.get_channel_count(int(self.id))
366 def authenticate_url(self, url):
367 return util.url_add_authentication(url, self.username, self.password)
369 def __init__(self, db, download_dir):
370 self.db = db
371 self.download_dir = download_dir
372 self.id = None
373 self.url = None
374 self.title = ''
375 self.link = ''
376 self.description = ''
377 self.image = None
378 self.pubDate = 0
379 self.parse_error = None
380 self.foldername = None
381 self.auto_foldername = 1 # automatically generated foldername
383 # should this channel be synced to devices? (ex: iPod)
384 self.sync_to_devices = True
385 # to which playlist should be synced
386 self.device_playlist_name = 'gPodder'
387 # if set, this overrides the channel-provided title
388 self.override_title = ''
389 self.username = ''
390 self.password = ''
392 self.last_modified = None
393 self.etag = None
395 self.save_dir_size = 0
396 self.__save_dir_size_set = False
398 self.channel_is_locked = False
400 self.release_expected = time.time()
401 self.release_deviation = 0
402 self.updated_timestamp = 0
404 def calculate_publish_behaviour(self):
405 episodes = self.db.load_episodes(self, factory=self.episode_factory, limit=30)
406 if len(episodes) < 3:
407 return
409 deltas = []
410 latest = max(e.pubDate for e in episodes)
411 for index in range(len(episodes)-1):
412 if episodes[index].pubDate != 0 and episodes[index+1].pubDate != 0:
413 deltas.append(episodes[index].pubDate - episodes[index+1].pubDate)
415 if len(deltas) > 1:
416 stats = corestats.Stats(deltas)
417 self.release_expected = min([latest+stats.stdev(), latest+(stats.min()+stats.avg())*.5])
418 self.release_deviation = stats.stdev()
419 else:
420 self.release_expected = latest
421 self.release_deviation = 0
423 def request_save_dir_size(self):
424 if not self.__save_dir_size_set:
425 self.update_save_dir_size()
426 self.__save_dir_size_set = True
428 def update_save_dir_size(self):
429 self.save_dir_size = util.calculate_size(self.save_dir)
431 def get_title( self):
432 if self.override_title:
433 return self.override_title
434 elif not self.__title.strip():
435 return self.url
436 else:
437 return self.__title
439 def set_title( self, value):
440 self.__title = value.strip()
442 title = property(fget=get_title,
443 fset=set_title)
445 def set_custom_title( self, custom_title):
446 custom_title = custom_title.strip()
448 # if the custom title is the same as we have
449 if custom_title == self.override_title:
450 return
452 # if custom title is the same as channel title and we didn't have a custom title
453 if custom_title == self.__title and self.override_title == '':
454 return
456 # make sure self.foldername is initialized
457 self.get_save_dir()
459 # rename folder if custom_title looks sane
460 new_folder_name = self.find_unique_folder_name(custom_title)
461 if len(new_folder_name) > 0 and new_folder_name != self.foldername:
462 log('Changing foldername based on custom title: %s', custom_title, sender=self)
463 new_folder = os.path.join(self.download_dir, new_folder_name)
464 old_folder = os.path.join(self.download_dir, self.foldername)
465 if os.path.exists(old_folder):
466 if not os.path.exists(new_folder):
467 # Old folder exists, new folder does not -> simply rename
468 log('Renaming %s => %s', old_folder, new_folder, sender=self)
469 os.rename(old_folder, new_folder)
470 else:
471 # Both folders exist -> move files and delete old folder
472 log('Moving files from %s to %s', old_folder, new_folder, sender=self)
473 for file in glob.glob(os.path.join(old_folder, '*')):
474 shutil.move(file, new_folder)
475 log('Removing %s', old_folder, sender=self)
476 shutil.rmtree(old_folder, ignore_errors=True)
477 self.foldername = new_folder_name
478 self.save()
480 if custom_title != self.__title:
481 self.override_title = custom_title
482 else:
483 self.override_title = ''
485 def get_downloaded_episodes(self):
486 return self.db.load_episodes(self, factory=self.episode_factory, state=gpodder.STATE_DOWNLOADED)
488 def get_new_episodes(self, downloading=lambda e: False):
490 Get a list of new episodes. You can optionally specify
491 "downloading" as a callback that takes an episode as
492 a parameter and returns True if the episode is currently
493 being downloaded or False if not.
495 By default, "downloading" is implemented so that it
496 reports all episodes as not downloading.
498 return [episode for episode in self.db.load_episodes(self, \
499 factory=self.episode_factory, state=gpodder.STATE_NORMAL) if \
500 episode.check_is_new(downloading=downloading)]
502 def get_playlist_filename(self):
503 # If the save_dir doesn't end with a slash (which it really should
504 # not, if the implementation is correct, we can just append .m3u :)
505 assert self.save_dir[-1] != '/'
506 return self.save_dir+'.m3u'
508 def update_m3u_playlist(self):
509 m3u_filename = self.get_playlist_filename()
511 downloaded_episodes = self.get_downloaded_episodes()
512 if not downloaded_episodes:
513 log('No episodes - removing %s', m3u_filename, sender=self)
514 util.delete_file(m3u_filename)
515 return
517 log('Writing playlist to %s', m3u_filename, sender=self)
518 f = open(m3u_filename, 'w')
519 f.write('#EXTM3U\n')
521 for episode in PodcastEpisode.sort_by_pubdate(downloaded_episodes):
522 if episode.was_downloaded(and_exists=True):
523 filename = episode.local_filename(create=False)
524 assert filename is not None
526 if os.path.dirname(filename).startswith(os.path.dirname(m3u_filename)):
527 filename = filename[len(os.path.dirname(m3u_filename)+os.sep):]
528 f.write('#EXTINF:0,'+self.title+' - '+episode.title+' ('+episode.cute_pubdate()+')\n')
529 f.write(filename+'\n')
531 f.close()
533 def get_all_episodes(self):
534 return self.db.load_episodes(self, factory=self.episode_factory)
536 def find_unique_folder_name(self, foldername):
537 # Remove trailing dots to avoid errors on Windows (bug 600)
538 foldername = foldername.strip().rstrip('.')
540 current_try = util.sanitize_filename(foldername, \
541 self.MAX_FOLDERNAME_LENGTH)
542 next_try_id = 2
544 while True:
545 if self.db.channel_foldername_exists(current_try):
546 current_try = '%s (%d)' % (foldername, next_try_id)
547 next_try_id += 1
548 else:
549 return current_try
551 def get_save_dir(self):
552 urldigest = hashlib.md5(self.url).hexdigest()
553 sanitizedurl = util.sanitize_filename(self.url, self.MAX_FOLDERNAME_LENGTH)
554 if self.foldername is None or (self.auto_foldername and (self.foldername == urldigest or self.foldername.startswith(sanitizedurl))):
555 # we must change the folder name, because it has not been set manually
556 fn_template = util.sanitize_filename(self.title, self.MAX_FOLDERNAME_LENGTH)
558 # if this is an empty string, try the basename
559 if len(fn_template) == 0:
560 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
561 fn_template = util.sanitize_filename(os.path.basename(self.url), self.MAX_FOLDERNAME_LENGTH)
563 # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
564 if len(fn_template) == 0:
565 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
566 fn_template = urldigest # no need for sanitize_filename here
568 # Find a unique folder name for this podcast
569 wanted_foldername = self.find_unique_folder_name(fn_template)
571 # if the foldername has not been set, check if the (old) md5 filename exists
572 if self.foldername is None and os.path.exists(os.path.join(self.download_dir, urldigest)):
573 log('Found pre-0.15.0 download folder for %s: %s', self.title, urldigest, sender=self)
574 self.foldername = urldigest
576 # we have a valid, new folder name in "current_try" -> use that!
577 if self.foldername is not None and wanted_foldername != self.foldername:
578 # there might be an old download folder crawling around - move it!
579 new_folder_name = os.path.join(self.download_dir, wanted_foldername)
580 old_folder_name = os.path.join(self.download_dir, self.foldername)
581 if os.path.exists(old_folder_name):
582 if not os.path.exists(new_folder_name):
583 # Old folder exists, new folder does not -> simply rename
584 log('Renaming %s => %s', old_folder_name, new_folder_name, sender=self)
585 os.rename(old_folder_name, new_folder_name)
586 else:
587 # Both folders exist -> move files and delete old folder
588 log('Moving files from %s to %s', old_folder_name, new_folder_name, sender=self)
589 for file in glob.glob(os.path.join(old_folder_name, '*')):
590 shutil.move(file, new_folder_name)
591 log('Removing %s', old_folder_name, sender=self)
592 shutil.rmtree(old_folder_name, ignore_errors=True)
593 log('Updating foldername of %s to "%s".', self.url, wanted_foldername, sender=self)
594 self.foldername = wanted_foldername
595 self.save()
597 save_dir = os.path.join(self.download_dir, self.foldername)
599 # Create save_dir if it does not yet exist
600 if not util.make_directory( save_dir):
601 log( 'Could not create save_dir: %s', save_dir, sender = self)
603 return save_dir
605 save_dir = property(fget=get_save_dir)
607 def remove_downloaded( self):
608 shutil.rmtree( self.save_dir, True)
610 @property
611 def cover_file(self):
612 new_name = os.path.join(self.save_dir, 'folder.jpg')
613 if not os.path.exists(new_name):
614 old_names = ('cover', '.cover')
615 for old_name in old_names:
616 filename = os.path.join(self.save_dir, old_name)
617 if os.path.exists(filename):
618 shutil.move(filename, new_name)
619 return new_name
621 return new_name
623 def delete_episode(self, episode):
624 filename = episode.local_filename(create=False, check_only=True)
625 if filename is not None:
626 util.delete_file(filename)
628 episode.set_state(gpodder.STATE_DELETED)
631 class PodcastEpisode(PodcastModelObject):
632 """holds data for one object in a channel"""
633 MAX_FILENAME_LENGTH = 200
635 def _get_played(self):
636 return self.is_played
638 def _set_played(self, played):
639 self.is_played = played
641 # Alias "is_played" to "played" for DB column mapping
642 played = property(fget=_get_played, fset=_set_played)
644 def _get_locked(self):
645 return self.is_locked
647 def _set_locked(self, locked):
648 self.is_locked = locked
650 # Alias "is_locked" to "locked" for DB column mapping
651 locked = property(fget=_get_locked, fset=_set_locked)
653 def _get_channel_id(self):
654 return self.channel.id
656 def _set_channel_id(self, channel_id):
657 assert self.channel.id == channel_id
659 # Accessor for the "channel_id" DB column
660 channel_id = property(fget=_get_channel_id, fset=_set_channel_id)
662 @staticmethod
663 def sort_by_pubdate(episodes, reverse=False):
664 """Sort a list of PodcastEpisode objects chronologically
666 Returns a iterable, sorted sequence of the episodes
668 key_pubdate = lambda e: e.pubDate
669 return sorted(episodes, key=key_pubdate, reverse=reverse)
671 def reload_from_db(self):
673 Re-reads all episode details for this object from the
674 database and updates this object accordingly. Can be
675 used to refresh existing objects when the database has
676 been updated (e.g. the filename has been set after a
677 download where it was not set before the download)
679 d = self.db.load_episode(self.id)
680 self.update_from_dict(d or {})
681 return self
683 def has_website_link(self):
684 return bool(self.link) and (self.link != self.url)
686 @staticmethod
687 def from_feedparser_entry(entry, channel):
688 episode = PodcastEpisode(channel)
690 episode.title = entry.get('title', '')
691 episode.link = entry.get('link', '')
692 episode.description = entry.get('summary', '')
694 # Fallback to subtitle if summary is not available0
695 if not episode.description:
696 episode.description = entry.get('subtitle', '')
698 episode.guid = entry.get('id', '')
699 if entry.get('updated_parsed', None):
700 episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))
702 # Enclosures
703 for e in entry.get('enclosures', ()):
704 episode.mimetype = e.get('type', 'application/octet-stream')
705 if episode.mimetype == '':
706 # See Maemo bug 10036
707 log('Fixing empty mimetype in ugly feed', sender=episode)
708 episode.mimetype = 'application/octet-stream'
710 if '/' not in episode.mimetype:
711 continue
713 episode.url = util.normalize_feed_url(e.get('href', ''))
714 if not episode.url:
715 continue
717 try:
718 episode.length = int(e.length) or -1
719 except:
720 episode.length = -1
722 return episode
724 # Media RSS content
725 for m in entry.get('media_content', ()):
726 episode.mimetype = m.get('type', 'application/octet-stream')
727 if '/' not in episode.mimetype:
728 continue
730 episode.url = util.normalize_feed_url(m.get('url', ''))
731 if not episode.url:
732 continue
734 try:
735 episode.length = int(m.fileSize) or -1
736 except:
737 episode.length = -1
739 return episode
741 # Brute-force detection of any links
742 for l in entry.get('links', ()):
743 episode.url = util.normalize_feed_url(l.get('href', ''))
744 if not episode.url:
745 continue
747 if youtube.is_video_link(episode.url):
748 return episode
750 # Check if we can resolve this link to a audio/video file
751 filename, extension = util.filename_from_url(episode.url)
752 file_type = util.file_type_by_extension(extension)
753 if file_type is None and hasattr(l, 'type'):
754 extension = util.extension_from_mimetype(l.type)
755 file_type = util.file_type_by_extension(extension)
757 # The link points to a audio or video file - use it!
758 if file_type is not None:
759 return episode
761 # Scan MP3 links in description text
762 mp3s = re.compile(r'http://[^"]*\.mp3')
763 for content in entry.get('content', ()):
764 html = content.value
765 for match in mp3s.finditer(html):
766 episode.url = match.group(0)
767 return episode
769 return None
771 def __init__(self, channel):
772 self.db = channel.db
773 # Used by Storage for faster saving
774 self.id = None
775 self.url = ''
776 self.title = ''
777 self.length = 0
778 self.mimetype = 'application/octet-stream'
779 self.guid = ''
780 self.description = ''
781 self.link = ''
782 self.channel = channel
783 self.pubDate = 0
784 self.filename = None
785 self.auto_filename = 1 # automatically generated filename
787 self.state = gpodder.STATE_NORMAL
788 self.is_played = False
790 # Initialize the "is_locked" property
791 self._is_locked = False
792 self.is_locked = channel.channel_is_locked
794 # Time attributes
795 self.total_time = 0
796 self.current_position = 0
797 self.current_position_updated = time.time()
799 def get_is_locked(self):
800 return self._is_locked
802 def set_is_locked(self, is_locked):
803 self._is_locked = bool(is_locked)
805 is_locked = property(fget=get_is_locked, fset=set_is_locked)
807 def save(self):
808 if self.state != gpodder.STATE_DOWNLOADED and self.file_exists():
809 self.state = gpodder.STATE_DOWNLOADED
810 self.db.save_episode(self)
812 def on_downloaded(self, filename):
813 self.state = gpodder.STATE_DOWNLOADED
814 self.is_played = False
815 self.length = os.path.getsize(filename)
816 self.db.save_downloaded_episode(self)
817 self.db.commit()
819 def set_state(self, state):
820 self.state = state
821 self.db.update_episode_state(self)
823 def mark(self, state=None, is_played=None, is_locked=None):
824 if state is not None:
825 self.state = state
826 if is_played is not None:
827 self.is_played = is_played
828 if is_locked is not None:
829 self.is_locked = is_locked
830 self.db.update_episode_state(self)
832 @property
833 def title_markup(self):
834 return '%s\n<small>%s</small>' % (xml.sax.saxutils.escape(self.title),
835 xml.sax.saxutils.escape(self.channel.title))
837 @property
838 def maemo_markup(self):
839 if self.length > 0:
840 length_str = '%s; ' % self.filesize_prop
841 else:
842 length_str = ''
843 return ('<b>%s</b>\n<small>%s'+_('released %s')+ \
844 '; '+_('from %s')+'</small>') % (\
845 xml.sax.saxutils.escape(self.title), \
846 xml.sax.saxutils.escape(length_str), \
847 xml.sax.saxutils.escape(self.pubdate_prop), \
848 xml.sax.saxutils.escape(self.channel.title))
850 @property
851 def maemo_remove_markup(self):
852 if self.is_played:
853 played_string = _('played')
854 else:
855 played_string = _('unplayed')
856 downloaded_string = self.get_age_string()
857 if not downloaded_string:
858 downloaded_string = _('today')
859 return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
860 '; '+_('from %s')+'</small>') % (\
861 xml.sax.saxutils.escape(self.title), \
862 xml.sax.saxutils.escape(self.filesize_prop), \
863 xml.sax.saxutils.escape(played_string), \
864 xml.sax.saxutils.escape(downloaded_string), \
865 xml.sax.saxutils.escape(self.channel.title))
867 def age_in_days(self):
868 return util.file_age_in_days(self.local_filename(create=False, \
869 check_only=True))
871 def get_age_string(self):
872 return util.file_age_to_string(self.age_in_days())
874 age_prop = property(fget=get_age_string)
876 def one_line_description( self):
877 lines = util.remove_html_tags(self.description).strip().splitlines()
878 if not lines or lines[0] == '':
879 return _('No description available')
880 else:
881 return ' '.join(lines)
883 def delete_from_disk(self):
884 try:
885 self.channel.delete_episode(self)
886 except:
887 log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self)
889 def find_unique_file_name(self, url, filename, extension):
890 current_try = util.sanitize_filename(filename, self.MAX_FILENAME_LENGTH)+extension
891 next_try_id = 2
892 lookup_url = None
894 if self.filename == current_try and current_try is not None:
895 # We already have this filename - good!
896 return current_try
898 while self.db.episode_filename_exists(current_try):
899 current_try = '%s (%d)%s' % (filename, next_try_id, extension)
900 next_try_id += 1
902 return current_try
904 def local_filename(self, create, force_update=False, check_only=False,
905 template=None):
906 """Get (and possibly generate) the local saving filename
908 Pass create=True if you want this function to generate a
909 new filename if none exists. You only want to do this when
910 planning to create/download the file after calling this function.
912 Normally, you should pass create=False. This will only
913 create a filename when the file already exists from a previous
914 version of gPodder (where we used md5 filenames). If the file
915 does not exist (and the filename also does not exist), this
916 function will return None.
918 If you pass force_update=True to this function, it will try to
919 find a new (better) filename and move the current file if this
920 is the case. This is useful if (during the download) you get
921 more information about the file, e.g. the mimetype and you want
922 to include this information in the file name generation process.
924 If check_only=True is passed to this function, it will never try
925 to rename the file, even if would be a good idea. Use this if you
926 only want to check if a file exists.
928 If "template" is specified, it should be a filename that is to
929 be used as a template for generating the "real" filename.
931 The generated filename is stored in the database for future access.
933 ext = self.extension(may_call_local_filename=False).encode('utf-8', 'ignore')
935 # For compatibility with already-downloaded episodes, we
936 # have to know md5 filenames if they are downloaded already
937 urldigest = hashlib.md5(self.url).hexdigest()
939 if not create and self.filename is None:
940 urldigest_filename = os.path.join(self.channel.save_dir, urldigest+ext)
941 if os.path.exists(urldigest_filename):
942 # The file exists, so set it up in our database
943 log('Recovering pre-0.15.0 file: %s', urldigest_filename, sender=self)
944 self.filename = urldigest+ext
945 self.auto_filename = 1
946 self.save()
947 return urldigest_filename
948 return None
950 # We only want to check if the file exists, so don't try to
951 # rename the file, even if it would be reasonable. See also:
952 # http://bugs.gpodder.org/attachment.cgi?id=236
953 if check_only:
954 if self.filename is None:
955 return None
956 else:
957 return os.path.join(self.channel.save_dir, self.filename)
959 if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
960 # Try to find a new filename for the current file
961 if template is not None:
962 # If template is specified, trust the template's extension
963 episode_filename, ext = os.path.splitext(template)
964 else:
965 episode_filename, extension_UNUSED = util.filename_from_url(self.url)
966 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
968 if 'redirect' in fn_template and template is None:
969 # This looks like a redirection URL - force URL resolving!
970 log('Looks like a redirection to me: %s', self.url, sender=self)
971 url = util.get_real_url(self.channel.authenticate_url(self.url))
972 log('Redirection resolved to: %s', url, sender=self)
973 (episode_filename, extension_UNUSED) = util.filename_from_url(url)
974 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
976 # Use the video title for YouTube downloads
977 for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
978 if self.url.startswith(yt_url):
979 fn_template = util.sanitize_filename(os.path.basename(self.title), self.MAX_FILENAME_LENGTH)
981 # If the basename is empty, use the md5 hexdigest of the URL
982 if len(fn_template) == 0 or fn_template.startswith('redirect.'):
983 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self.channel.url, self.url, sender=self)
984 fn_template = urldigest
986 # Find a unique filename for this episode
987 wanted_filename = self.find_unique_file_name(self.url, fn_template, ext)
989 # We populate the filename field the first time - does the old file still exist?
990 if self.filename is None and os.path.exists(os.path.join(self.channel.save_dir, urldigest+ext)):
991 log('Found pre-0.15.0 downloaded file: %s', urldigest, sender=self)
992 self.filename = urldigest+ext
994 # The old file exists, but we have decided to want a different filename
995 if self.filename is not None and wanted_filename != self.filename:
996 # there might be an old download folder crawling around - move it!
997 new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
998 old_file_name = os.path.join(self.channel.save_dir, self.filename)
999 if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
1000 log('Renaming %s => %s', old_file_name, new_file_name, sender=self)
1001 os.rename(old_file_name, new_file_name)
1002 elif force_update and not os.path.exists(old_file_name):
1003 # When we call force_update, the file might not yet exist when we
1004 # call it from the downloading code before saving the file
1005 log('Choosing new filename: %s', new_file_name, sender=self)
1006 else:
1007 log('Warning: %s exists or %s does not.', new_file_name, old_file_name, sender=self)
1008 log('Updating filename of %s to "%s".', self.url, wanted_filename, sender=self)
1009 elif self.filename is None:
1010 log('Setting filename to "%s".', wanted_filename, sender=self)
1011 else:
1012 log('Should update filename. Stays the same (%s). Good!', \
1013 wanted_filename, sender=self)
1014 self.filename = wanted_filename
1015 self.save()
1016 self.db.commit()
1018 return os.path.join(self.channel.save_dir, self.filename)
1020 def set_mimetype(self, mimetype, commit=False):
1021 """Sets the mimetype for this episode"""
1022 self.mimetype = mimetype
1023 if commit:
1024 self.db.commit()
1026 def extension(self, may_call_local_filename=True):
1027 filename, ext = util.filename_from_url(self.url)
1028 if may_call_local_filename:
1029 filename = self.local_filename(create=False)
1030 if filename is not None:
1031 filename, ext = os.path.splitext(filename)
1032 # if we can't detect the extension from the url fallback on the mimetype
1033 if ext == '' or util.file_type_by_extension(ext) is None:
1034 ext = util.extension_from_mimetype(self.mimetype)
1035 return ext
1037 def check_is_new(self, downloading=lambda e: False):
1039 Returns True if this episode is to be considered new.
1040 "Downloading" should be a callback that gets an episode
1041 as its parameter and returns True if the episode is
1042 being downloaded at the moment.
1044 return self.state == gpodder.STATE_NORMAL and \
1045 not self.is_played and \
1046 not downloading(self)
1048 def mark_new(self):
1049 self.state = gpodder.STATE_NORMAL
1050 self.is_played = False
1051 self.db.update_episode_state(self)
1053 def mark_old(self):
1054 self.is_played = True
1055 self.db.update_episode_state(self)
1057 def file_exists(self):
1058 filename = self.local_filename(create=False, check_only=True)
1059 if filename is None:
1060 return False
1061 else:
1062 return os.path.exists(filename)
1064 def was_downloaded(self, and_exists=False):
1065 if self.state != gpodder.STATE_DOWNLOADED:
1066 return False
1067 if and_exists and not self.file_exists():
1068 return False
1069 return True
1071 def sync_filename(self, use_custom=False, custom_format=None):
1072 if use_custom:
1073 return util.object_string_formatter(custom_format,
1074 episode=self, podcast=self.channel)
1075 else:
1076 return self.title
1078 def file_type(self):
1079 # Assume all YouTube links are video files
1080 if youtube.is_video_link(self.url):
1081 return 'video'
1083 return util.file_type_by_extension(self.extension())
1085 @property
1086 def basename( self):
1087 return os.path.splitext( os.path.basename( self.url))[0]
1089 @property
1090 def published( self):
1092 Returns published date as YYYYMMDD (or 00000000 if not available)
1094 try:
1095 return datetime.datetime.fromtimestamp(self.pubDate).strftime('%Y%m%d')
1096 except:
1097 log( 'Cannot format pubDate for "%s".', self.title, sender = self)
1098 return '00000000'
1100 @property
1101 def pubtime(self):
1103 Returns published time as HHMM (or 0000 if not available)
1105 try:
1106 return datetime.datetime.fromtimestamp(self.pubDate).strftime('%H%M')
1107 except:
1108 log('Cannot format pubDate (time) for "%s".', self.title, sender=self)
1109 return '0000'
1111 def cute_pubdate(self):
1112 result = util.format_date(self.pubDate)
1113 if result is None:
1114 return '(%s)' % _('unknown')
1115 else:
1116 return result
1118 pubdate_prop = property(fget=cute_pubdate)
1120 def calculate_filesize( self):
1121 filename = self.local_filename(create=False)
1122 if filename is None:
1123 log('calculate_filesized called, but filename is None!', sender=self)
1124 try:
1125 self.length = os.path.getsize(filename)
1126 except:
1127 log( 'Could not get filesize for %s.', self.url)
1129 def get_filesize_string(self):
1130 return util.format_filesize(self.length)
1132 filesize_prop = property(fget=get_filesize_string)
1134 def get_played_string( self):
1135 if not self.is_played:
1136 return _('Unplayed')
1138 return ''
1140 played_prop = property(fget=get_played_string)
1142 def is_duplicate(self, episode):
1143 if self.title == episode.title and self.pubDate == episode.pubDate:
1144 log('Possible duplicate detected: %s', self.title)
1145 return True
1146 return False
1148 def duplicate_id(self):
1149 return hash((self.title, self.pubDate))
1151 def update_from(self, episode):
1152 for k in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
1153 setattr(self, k, getattr(episode, k))