fix #648 Checking for new episodes clears file_size for some downloaded files
[gpodder.git] / src / gpodder / model.py
blob35636f6eb5855e52d3734b02c4159b98271ef35c
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2018 The gPodder Team
5 # Copyright (c) 2011 Neal H. Walfield
7 # gPodder is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # gPodder is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 # gpodder.model - Core model classes for gPodder (2009-08-13)
24 # Based on libpodcasts.py (thp, 2005-10-29)
27 import collections
28 import datetime
29 import glob
30 import hashlib
31 import logging
32 import os
33 import re
34 import shutil
35 import string
36 import time
38 import gpodder
39 import podcastparser
40 from gpodder import (coverart, escapist_videos, feedcore, schema, util, vimeo,
41 youtube)
43 logger = logging.getLogger(__name__)
45 _ = gpodder.gettext
48 class Feed:
49 """ abstract class for presenting a parsed feed to PodcastChannel """
51 def get_title(self):
52 """ :return str: the feed's title """
53 return None
55 def get_link(self):
56 """ :return str: link to the feed's website """
57 return None
59 def get_description(self):
60 """ :return str: feed's textual description """
61 return None
63 def get_cover_url(self):
64 """ :return str: url of the feed's cover image """
65 return None
67 def get_payment_url(self):
68 """ :return str: optional -- feed's payment url """
69 return None
71 def get_http_etag(self):
72 """ :return str: optional -- last HTTP etag header, for conditional request next time """
73 return None
75 def get_http_last_modified(self):
76 """ :return str: optional -- last HTTP Last-Modified header, for conditional request next time """
77 return None
79 def get_new_episodes(self, channel, existing_guids):
80 """
81 Produce new episodes and update old ones.
82 Feed is a class to present results, so the feed shall have already been fetched.
83 Existing episodes not in all_seen_guids will be purged from the database.
84 :param PodcastChannel channel: the updated channel
85 :param dict(str, PodcastEpisode): existing episodes, by guid
86 :return (list(PodcastEpisode), set(str)): new_episodes, all_seen_guids
87 """
88 return ([], set())
90 def get_next_page(self, channel, max_episodes):
91 """
92 Paginated feed support (RFC 5005).
93 If the feed is paged, return the next feed page.
94 Returned page will in turn be asked for the next page, until None is returned.
95 :return feedcore.Result: the next feed's page,
96 as a fully parsed Feed or None
97 """
98 return None
101 class PodcastParserFeed(Feed):
102 def __init__(self, feed, fetcher, max_episodes=0):
103 self.feed = feed
104 self.fetcher = fetcher
105 self.max_episodes = max_episodes
107 def get_title(self):
108 return self.feed.get('title')
110 def get_link(self):
111 return self.feed.get('link')
113 def get_description(self):
114 return self.feed.get('description')
116 def get_cover_url(self):
117 return self.feed.get('cover_url')
119 def get_payment_url(self):
120 return self.feed.get('payment_url')
122 def get_http_etag(self):
123 return self.feed.get('headers', {}).get('etag')
125 def get_http_last_modified(self):
126 return self.feed.get('headers', {}).get('last-modified')
128 def get_new_episodes(self, channel, existing_guids):
129 # Keep track of episode GUIDs currently seen in the feed
130 seen_guids = set()
132 # list of new episodes
133 new_episodes = []
135 # We have to sort the entries in descending chronological order,
136 # because if the feed lists items in ascending order and has >
137 # max_episodes old episodes, new episodes will not be shown.
138 # See also: gPodder Bug 1186
139 entries = sorted(self.feed.get('episodes', []), key=lambda episode: episode['published'], reverse=True)
141 # We can limit the maximum number of entries that gPodder will parse
142 if self.max_episodes > 0 and len(entries) > self.max_episodes:
143 entries = entries[:self.max_episodes]
145 # Search all entries for new episodes
146 for entry in entries:
147 episode = channel.EpisodeClass.from_podcastparser_entry(entry, channel)
148 if episode is None:
149 continue
151 seen_guids.add(episode.guid)
152 # Detect (and update) existing episode based on GUIDs
153 existing_episode = existing_guids.get(episode.guid, None)
154 if existing_episode:
155 existing_episode.update_from(episode)
156 existing_episode.save()
157 continue
159 episode.save()
160 new_episodes.append(episode)
161 return new_episodes, seen_guids
163 def get_next_page(self, channel, max_episodes):
164 if 'paged_feed_next' in self.feed:
165 url = self.feed['paged_feed_next']
166 logger.debug("get_next_page: feed has next %s", url)
167 url = channel.authenticate_url(url)
168 res = self.fetcher.fetch(url, max_episodes=max_episodes)
169 if res.status == feedcore.UPDATED_FEED:
170 res.feed = PodcastParserFeed(res.feed, self.fetcher, max_episodes)
171 return res
172 return None
175 class gPodderFetcher(feedcore.Fetcher):
177 This class extends the feedcore Fetcher with the gPodder User-Agent and the
178 Proxy handler based on the current settings in gPodder.
180 custom_handlers = []
182 def fetch_channel(self, channel, max_episodes):
183 for handler in self.custom_handlers:
184 custom_feed = handler.fetch_channel(channel, max_episodes)
185 if custom_feed is not None:
186 return custom_feed
187 # If we have a username or password, rebuild the url with them included
188 # Note: using a HTTPBasicAuthHandler would be pain because we need to
189 # know the realm. It can be done, but I think this method works, too
190 url = channel.authenticate_url(channel.url)
191 res = self.fetch(url, channel.http_etag, channel.http_last_modified, max_episodes)
192 if res.status == feedcore.UPDATED_FEED:
193 res.feed = PodcastParserFeed(res.feed, self, max_episodes)
194 return res
196 def _resolve_url(self, url):
197 url = youtube.get_real_channel_url(url)
198 url = vimeo.get_real_channel_url(url)
199 url = escapist_videos.get_real_channel_url(url)
200 return url
202 @classmethod
203 def register(cls, handler):
204 cls.custom_handlers.append(handler)
206 @classmethod
207 def unregister(cls, handler):
208 cls.custom_handlers.remove(handler)
211 # The "register" method is exposed here for external usage
212 register_custom_handler = gPodderFetcher.register
214 # The "unregister" method is exposed here for external usage
215 unregister_custom_handler = gPodderFetcher.unregister
217 # Our podcast model:
219 # database -> podcast -> episode -> download/playback
220 # podcast.parent == db
221 # podcast.children == [episode, ...]
222 # episode.parent == podcast
224 # - normally: episode.children = (None, None)
225 # - downloading: episode.children = (DownloadTask(), None)
226 # - playback: episode.children = (None, PlaybackTask())
229 class PodcastModelObject(object):
231 A generic base class for our podcast model providing common helper
232 and utility functions.
234 __slots__ = ('id', 'parent', 'children')
236 @classmethod
237 def create_from_dict(cls, d, *args):
239 Create a new object, passing "args" to the constructor
240 and then updating the object with the values from "d".
242 o = cls(*args)
244 # XXX: all(map(lambda k: hasattr(o, k), d))?
245 for k, v in d.items():
246 setattr(o, k, v)
248 return o
251 class PodcastEpisode(PodcastModelObject):
252 """holds data for one object in a channel"""
253 # In theory, Linux can have 255 bytes (not characters!) in a filename, but
254 # filesystems like eCryptFS store metadata in the filename, making the
255 # effective number of characters less than that. eCryptFS recommends
256 # 140 chars, we use 120 here (140 - len(extension) - len(".partial")).
257 # References: gPodder bug 1898, http://unix.stackexchange.com/a/32834
258 MAX_FILENAME_LENGTH = 120 # without extension
259 MAX_FILENAME_WITH_EXT_LENGTH = 140 - len(".partial") # with extension
261 __slots__ = schema.EpisodeColumns
263 def _deprecated(self):
264 raise Exception('Property is deprecated!')
266 is_played = property(fget=_deprecated, fset=_deprecated)
267 is_locked = property(fget=_deprecated, fset=_deprecated)
269 def has_website_link(self):
270 return bool(self.link) and (self.link != self.url or
271 youtube.is_video_link(self.link))
273 @classmethod
274 def from_podcastparser_entry(cls, entry, channel):
275 episode = cls(channel)
276 episode.guid = entry['guid']
277 episode.title = entry['title']
278 episode.link = entry['link']
279 episode.description = entry['description']
280 if entry.get('description_html'):
281 episode.description_html = entry['description_html']
283 episode.total_time = entry['total_time']
284 episode.published = entry['published']
285 episode.payment_url = entry['payment_url']
287 audio_available = any(enclosure['mime_type'].startswith('audio/') for enclosure in entry['enclosures'])
288 video_available = any(enclosure['mime_type'].startswith('video/') for enclosure in entry['enclosures'])
290 for enclosure in entry['enclosures']:
291 episode.mime_type = enclosure['mime_type']
293 # Skip images in feeds if audio or video is available (bug 979)
294 # This must (and does) also look in Media RSS enclosures (bug 1430)
295 if episode.mime_type.startswith('image/') and (audio_available or video_available):
296 continue
298 # If we have audio or video available later on, skip
299 # 'application/octet-stream' data types (fixes Linux Outlaws)
300 if episode.mime_type == 'application/octet-stream' and (audio_available or video_available):
301 continue
303 episode.url = util.normalize_feed_url(enclosure['url'])
304 if not episode.url:
305 continue
307 episode.file_size = enclosure['file_size']
308 return episode
310 # Brute-force detection of the episode link
311 episode.url = util.normalize_feed_url(entry['link'])
312 if not episode.url:
313 return None
315 if any(mod.is_video_link(episode.url) for mod in (youtube, vimeo, escapist_videos)):
316 return episode
318 # Check if we can resolve this link to a audio/video file
319 filename, extension = util.filename_from_url(episode.url)
320 file_type = util.file_type_by_extension(extension)
322 # The link points to a audio or video file - use it!
323 if file_type is not None:
324 return episode
326 return None
328 def __init__(self, channel):
329 self.parent = channel
330 self.podcast_id = self.parent.id
331 self.children = (None, None)
333 self.id = None
334 self.url = ''
335 self.title = ''
336 self.file_size = 0
337 self.mime_type = 'application/octet-stream'
338 self.guid = ''
339 self.description = ''
340 self.description_html = ''
341 self.link = ''
342 self.published = 0
343 self.download_filename = None
344 self.payment_url = None
346 self.state = gpodder.STATE_NORMAL
347 self.is_new = True
348 self.archive = channel.auto_archive_episodes
350 # Time attributes
351 self.total_time = 0
352 self.current_position = 0
353 self.current_position_updated = 0
355 # Timestamp of last playback time
356 self.last_playback = 0
358 @property
359 def channel(self):
360 return self.parent
362 @property
363 def db(self):
364 return self.parent.parent.db
366 @property
367 def trimmed_title(self):
368 """Return the title with the common prefix trimmed"""
369 # Minimum amount of leftover characters after trimming. This
370 # avoids things like "Common prefix 123" to become just "123".
371 # If there are LEFTOVER_MIN or less characters after trimming,
372 # the original title will be returned without trimming.
373 LEFTOVER_MIN = 5
375 # "Podcast Name - Title" and "Podcast Name: Title" -> "Title"
376 for postfix in (' - ', ': '):
377 prefix = self.parent.title + postfix
378 if (self.title.startswith(prefix) and
379 len(self.title) - len(prefix) > LEFTOVER_MIN):
380 return self.title[len(prefix):]
382 regex_patterns = [
383 # "Podcast Name <number>: ..." -> "<number>: ..."
384 r'^%s (\d+: .*)' % re.escape(self.parent.title),
386 # "Episode <number>: ..." -> "<number>: ..."
387 r'Episode (\d+:.*)',
390 for pattern in regex_patterns:
391 if re.match(pattern, self.title):
392 title = re.sub(pattern, r'\1', self.title)
393 if len(title) > LEFTOVER_MIN:
394 return title
396 # "#001: Title" -> "001: Title"
397 if (
398 not self.parent._common_prefix and
399 re.match('^#\d+: ', self.title) and
400 len(self.title) - 1 > LEFTOVER_MIN):
401 return self.title[1:]
403 if (self.parent._common_prefix is not None and
404 self.title.startswith(self.parent._common_prefix) and
405 len(self.title) - len(self.parent._common_prefix) > LEFTOVER_MIN):
406 return self.title[len(self.parent._common_prefix):]
408 return self.title
410 def _set_download_task(self, download_task):
411 self.children = (download_task, self.children[1])
413 def _get_download_task(self):
414 return self.children[0]
416 download_task = property(_get_download_task, _set_download_task)
418 @property
419 def downloading(self):
420 task = self.download_task
421 if task is None:
422 return False
424 return task.status in (task.DOWNLOADING, task.QUEUED, task.PAUSED)
426 def check_is_new(self):
427 return (self.state == gpodder.STATE_NORMAL and self.is_new and
428 not self.downloading)
430 def save(self):
431 gpodder.user_extensions.on_episode_save(self)
432 self.db.save_episode(self)
434 def on_downloaded(self, filename):
435 self.state = gpodder.STATE_DOWNLOADED
436 self.is_new = True
437 self.file_size = os.path.getsize(filename)
438 self.save()
440 def set_state(self, state):
441 self.state = state
442 self.save()
444 def playback_mark(self):
445 self.is_new = False
446 self.last_playback = int(time.time())
447 gpodder.user_extensions.on_episode_playback(self)
448 self.save()
450 def mark(self, state=None, is_played=None, is_locked=None):
451 if state is not None:
452 self.state = state
453 if is_played is not None:
454 self.is_new = not is_played
456 # "Mark as new" must "undelete" the episode
457 if self.is_new and self.state == gpodder.STATE_DELETED:
458 self.state = gpodder.STATE_NORMAL
459 if is_locked is not None:
460 self.archive = is_locked
461 self.save()
463 def age_in_days(self):
464 return util.file_age_in_days(self.local_filename(create=False,
465 check_only=True))
467 age_int_prop = property(fget=age_in_days)
469 def get_age_string(self):
470 return util.file_age_to_string(self.age_in_days())
472 age_prop = property(fget=get_age_string)
474 def one_line_description(self):
475 MAX_LINE_LENGTH = 120
476 desc = util.remove_html_tags(self.description or '')
477 desc = re.sub('\s+', ' ', desc).strip()
478 if not desc:
479 return _('No description available')
480 else:
481 # Decode the description to avoid gPodder bug 1277
482 desc = util.convert_bytes(desc).strip()
484 if len(desc) > MAX_LINE_LENGTH:
485 return desc[:MAX_LINE_LENGTH] + '...'
486 else:
487 return desc
489 def delete_from_disk(self):
490 filename = self.local_filename(create=False, check_only=True)
491 if filename is not None:
492 gpodder.user_extensions.on_episode_delete(self, filename)
493 util.delete_file(filename)
495 self.set_state(gpodder.STATE_DELETED)
497 def get_playback_url(self, fmt_ids=None, vimeo_fmt=None, allow_partial=False):
498 """Local (or remote) playback/streaming filename/URL
500 Returns either the local filename or a streaming URL that
501 can be used to playback this episode.
503 Also returns the filename of a partially downloaded file
504 in case partial (preview) playback is desired.
506 url = self.local_filename(create=False)
508 if (allow_partial and url is not None and
509 os.path.exists(url + '.partial')):
510 return url + '.partial'
512 if url is None or not os.path.exists(url):
513 url = self.url
514 url = youtube.get_real_download_url(url, fmt_ids)
515 url = vimeo.get_real_download_url(url, vimeo_fmt)
516 url = escapist_videos.get_real_download_url(url)
518 return url
520 def find_unique_file_name(self, filename, extension):
521 # Remove leading and trailing whitespace + dots (to avoid hidden files)
522 filename = filename.strip('.' + string.whitespace) + extension
524 for name in util.generate_names(filename):
525 if (not self.db.episode_filename_exists(self.podcast_id, name) or
526 self.download_filename == name):
527 return name
529 def local_filename(self, create, force_update=False, check_only=False,
530 template=None, return_wanted_filename=False):
531 """Get (and possibly generate) the local saving filename
533 Pass create=True if you want this function to generate a
534 new filename if none exists. You only want to do this when
535 planning to create/download the file after calling this function.
537 Normally, you should pass create=False. This will only
538 create a filename when the file already exists from a previous
539 version of gPodder (where we used md5 filenames). If the file
540 does not exist (and the filename also does not exist), this
541 function will return None.
543 If you pass force_update=True to this function, it will try to
544 find a new (better) filename and move the current file if this
545 is the case. This is useful if (during the download) you get
546 more information about the file, e.g. the mimetype and you want
547 to include this information in the file name generation process.
549 If check_only=True is passed to this function, it will never try
550 to rename the file, even if would be a good idea. Use this if you
551 only want to check if a file exists.
553 If "template" is specified, it should be a filename that is to
554 be used as a template for generating the "real" filename.
556 The generated filename is stored in the database for future access.
558 If return_wanted_filename is True, the filename will not be written to
559 the database, but simply returned by this function (for use by the
560 "import external downloads" feature).
562 if self.download_filename is None and (check_only or not create):
563 return None
565 ext = self.extension(may_call_local_filename=False)
567 if not check_only and (force_update or not self.download_filename):
568 # Avoid and catch gPodder bug 1440 and similar situations
569 if template == '':
570 logger.warn('Empty template. Report this podcast URL %s',
571 self.channel.url)
572 template = None
574 # Try to find a new filename for the current file
575 if template is not None:
576 # If template is specified, trust the template's extension
577 episode_filename, ext = os.path.splitext(template)
578 else:
579 episode_filename, _ = util.filename_from_url(self.url)
581 if 'redirect' in episode_filename and template is None:
582 # This looks like a redirection URL - force URL resolving!
583 logger.warn('Looks like a redirection to me: %s', self.url)
584 url = util.get_real_url(self.channel.authenticate_url(self.url))
585 logger.info('Redirection resolved to: %s', url)
586 episode_filename, _ = util.filename_from_url(url)
588 # Use title for YouTube, Vimeo and Soundcloud downloads
589 if (youtube.is_video_link(self.url) or
590 vimeo.is_video_link(self.url) or
591 escapist_videos.is_video_link(self.url) or
592 episode_filename == 'stream'):
593 episode_filename = self.title
595 # If the basename is empty, use the md5 hexdigest of the URL
596 if not episode_filename or episode_filename.startswith('redirect.'):
597 logger.error('Report this feed: Podcast %s, episode %s',
598 self.channel.url, self.url)
599 episode_filename = hashlib.md5(self.url.encode('utf-8')).hexdigest()
601 # Also sanitize ext (see #591 where ext=.mp3?dest-id=754182)
602 fn_template, ext = util.sanitize_filename_ext(
603 episode_filename,
604 ext,
605 self.MAX_FILENAME_LENGTH,
606 self.MAX_FILENAME_WITH_EXT_LENGTH)
607 # Find a unique filename for this episode
608 wanted_filename = self.find_unique_file_name(fn_template, ext)
610 if return_wanted_filename:
611 # return the calculated filename without updating the database
612 return wanted_filename
614 # The old file exists, but we have decided to want a different filename
615 if self.download_filename and wanted_filename != self.download_filename:
616 # there might be an old download folder crawling around - move it!
617 new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
618 old_file_name = os.path.join(self.channel.save_dir, self.download_filename)
619 if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
620 logger.info('Renaming %s => %s', old_file_name, new_file_name)
621 os.rename(old_file_name, new_file_name)
622 elif force_update and not os.path.exists(old_file_name):
623 # When we call force_update, the file might not yet exist when we
624 # call it from the downloading code before saving the file
625 logger.info('Choosing new filename: %s', new_file_name)
626 else:
627 logger.warn('%s exists or %s does not', new_file_name, old_file_name)
628 logger.info('Updating filename of %s to "%s".', self.url, wanted_filename)
629 elif self.download_filename is None:
630 logger.info('Setting download filename: %s', wanted_filename)
631 self.download_filename = wanted_filename
632 self.save()
634 if return_wanted_filename:
635 # return the filename, not full path
636 return self.download_filename
637 return os.path.join(self.channel.save_dir, self.download_filename)
639 def extension(self, may_call_local_filename=True):
640 filename, ext = util.filename_from_url(self.url)
641 if may_call_local_filename:
642 filename = self.local_filename(create=False)
643 if filename is not None:
644 filename, ext = os.path.splitext(filename)
645 # if we can't detect the extension from the url fallback on the mimetype
646 if ext == '' or util.file_type_by_extension(ext) is None:
647 ext = util.extension_from_mimetype(self.mime_type)
648 return ext
650 def mark_new(self):
651 self.is_new = True
652 self.save()
654 def mark_old(self):
655 self.is_new = False
656 self.save()
658 def file_exists(self):
659 filename = self.local_filename(create=False, check_only=True)
660 if filename is None:
661 return False
662 else:
663 return os.path.exists(filename)
665 def was_downloaded(self, and_exists=False):
666 if self.state != gpodder.STATE_DOWNLOADED:
667 return False
668 if and_exists and not self.file_exists():
669 return False
670 return True
672 def sync_filename(self, use_custom=False, custom_format=None):
673 if use_custom:
674 return util.object_string_formatter(custom_format,
675 episode=self, podcast=self.channel)
676 else:
677 return self.title
679 def file_type(self):
680 # Assume all YouTube/Vimeo links are video files
681 if youtube.is_video_link(self.url) or vimeo.is_video_link(self.url) or escapist_videos.is_video_link(self.url):
682 return 'video'
684 return util.file_type_by_extension(self.extension())
686 @property
687 def basename(self):
688 return os.path.splitext(os.path.basename(self.url))[0]
690 @property
691 def pubtime(self):
693 Returns published time as HHMM (or 0000 if not available)
695 try:
696 return datetime.datetime.fromtimestamp(self.published).strftime('%H%M')
697 except:
698 logger.warn('Cannot format pubtime: %s', self.title, exc_info=True)
699 return '0000'
701 def playlist_title(self):
702 """Return a title for this episode in a playlist
704 The title will be composed of the podcast name, the
705 episode name and the publication date. The return
706 value is the canonical representation of this episode
707 in playlists (for example, M3U playlists).
709 return '%s - %s (%s)' % (self.channel.title,
710 self.title,
711 self.cute_pubdate())
713 def cute_pubdate(self):
714 result = util.format_date(self.published)
715 if result is None:
716 return '(%s)' % _('unknown')
717 else:
718 return result
720 pubdate_prop = property(fget=cute_pubdate)
722 def published_datetime(self):
723 return datetime.datetime.fromtimestamp(self.published)
725 @property
726 def sortdate(self):
727 return self.published_datetime().strftime('%Y-%m-%d')
729 @property
730 def pubdate_day(self):
731 return self.published_datetime().strftime('%d')
733 @property
734 def pubdate_month(self):
735 return self.published_datetime().strftime('%m')
737 @property
738 def pubdate_year(self):
739 return self.published_datetime().strftime('%y')
741 def is_finished(self):
742 """Return True if this episode is considered "finished playing"
744 An episode is considered "finished" when there is a
745 current position mark on the track, and when the
746 current position is greater than 99 percent of the
747 total time or inside the last 10 seconds of a track.
749 return (self.current_position > 0 and self.total_time > 0 and
750 (self.current_position + 10 >= self.total_time or
751 self.current_position >= self.total_time * .99))
753 def get_play_info_string(self, duration_only=False):
754 duration = util.format_time(self.total_time)
755 if duration_only and self.total_time > 0:
756 return duration
757 elif self.is_finished():
758 return '%s (%s)' % (_('Finished'), duration)
759 elif self.current_position > 0 and \
760 self.current_position != self.total_time:
761 position = util.format_time(self.current_position)
762 return '%s / %s' % (position, duration)
763 elif self.total_time > 0:
764 return duration
765 else:
766 return '-'
768 def update_from(self, episode):
769 for k in ('title', 'url', 'description', 'description_html', 'link', 'published', 'guid', 'payment_url'):
770 setattr(self, k, getattr(episode, k))
771 # Don't overwrite file size on downloaded episodes
772 # See #648 refreshing a youtube podcast clears downloaded file size
773 if self.state != gpodder.STATE_DOWNLOADED:
774 setattr(self, 'file_size', getattr(episode, 'file_size'))
777 class PodcastChannel(PodcastModelObject):
778 __slots__ = schema.PodcastColumns + ('_common_prefix',)
780 UNICODE_TRANSLATE = {ord('ö'): 'o', ord('ä'): 'a', ord('ü'): 'u'}
782 # Enumerations for download strategy
783 STRATEGY_DEFAULT, STRATEGY_LATEST = list(range(2))
785 # Description and ordering of strategies
786 STRATEGIES = [
787 (STRATEGY_DEFAULT, _('Default')),
788 (STRATEGY_LATEST, _('Only keep latest')),
791 MAX_FOLDERNAME_LENGTH = 60
792 SECONDS_PER_DAY = 24 * 60 * 60
793 SECONDS_PER_WEEK = 7 * 24 * 60 * 60
794 EpisodeClass = PodcastEpisode
796 feed_fetcher = gPodderFetcher()
798 def __init__(self, model, id=None):
799 self.parent = model
800 self.children = []
802 self.id = id
803 self.url = None
804 self.title = ''
805 self.link = ''
806 self.description = ''
807 self.cover_url = None
808 self.payment_url = None
810 self.auth_username = ''
811 self.auth_password = ''
813 self.http_last_modified = None
814 self.http_etag = None
816 self.auto_archive_episodes = False
817 self.download_folder = None
818 self.pause_subscription = False
819 self.sync_to_mp3_player = True
820 self.cover_thumb = None
822 self.section = _('Other')
823 self._common_prefix = None
824 self.download_strategy = PodcastChannel.STRATEGY_DEFAULT
826 if self.id:
827 self.children = self.db.load_episodes(self, self.episode_factory)
828 self._determine_common_prefix()
830 @property
831 def model(self):
832 return self.parent
834 @property
835 def db(self):
836 return self.parent.db
838 def get_download_strategies(self):
839 for value, caption in PodcastChannel.STRATEGIES:
840 yield self.download_strategy == value, value, caption
842 def set_download_strategy(self, download_strategy):
843 if download_strategy == self.download_strategy:
844 return
846 caption = dict(self.STRATEGIES).get(download_strategy)
847 if caption is not None:
848 logger.debug('Strategy for %s changed to %s', self.title, caption)
849 self.download_strategy = download_strategy
850 else:
851 logger.warn('Cannot set strategy to %d', download_strategy)
853 def rewrite_url(self, new_url):
854 new_url = util.normalize_feed_url(new_url)
855 if new_url is None:
856 return None
858 self.url = new_url
859 self.http_etag = None
860 self.http_last_modified = None
861 self.save()
862 return new_url
864 def check_download_folder(self):
865 """Check the download folder for externally-downloaded files
867 This will try to assign downloaded files with episodes in the
868 database.
870 This will also cause missing files to be marked as deleted.
872 known_files = set()
874 for episode in self.get_episodes(gpodder.STATE_DOWNLOADED):
875 if episode.was_downloaded():
876 filename = episode.local_filename(create=False)
877 if filename is None:
878 # No filename has been determined for this episode
879 continue
881 if not os.path.exists(filename):
882 # File has been deleted by the user - simulate a
883 # delete event (also marks the episode as deleted)
884 logger.debug('Episode deleted: %s', filename)
885 episode.delete_from_disk()
886 continue
888 known_files.add(filename)
890 existing_files = set(filename for filename in
891 glob.glob(os.path.join(self.save_dir, '*'))
892 if not filename.endswith('.partial'))
894 ignore_files = ['folder' + ext for ext in
895 coverart.CoverDownloader.EXTENSIONS]
897 external_files = existing_files.difference(list(known_files) +
898 [os.path.join(self.save_dir, ignore_file)
899 for ignore_file in ignore_files])
900 if not external_files:
901 return
903 all_episodes = self.get_all_episodes()
905 for filename in external_files:
906 found = False
908 basename = os.path.basename(filename)
909 existing = [e for e in all_episodes if e.download_filename == basename]
910 if existing:
911 existing = existing[0]
912 logger.info('Importing external download: %s', filename)
913 existing.on_downloaded(filename)
914 continue
916 for episode in all_episodes:
917 wanted_filename = episode.local_filename(create=True,
918 return_wanted_filename=True)
919 if basename == wanted_filename:
920 logger.info('Importing external download: %s', filename)
921 episode.download_filename = basename
922 episode.on_downloaded(filename)
923 found = True
924 break
926 wanted_base, wanted_ext = os.path.splitext(wanted_filename)
927 target_base, target_ext = os.path.splitext(basename)
928 if wanted_base == target_base:
929 # Filenames only differ by the extension
930 wanted_type = util.file_type_by_extension(wanted_ext)
931 target_type = util.file_type_by_extension(target_ext)
933 # If wanted type is None, assume that we don't know
934 # the right extension before the download (e.g. YouTube)
935 # if the wanted type is the same as the target type,
936 # assume that it's the correct file
937 if wanted_type is None or wanted_type == target_type:
938 logger.info('Importing external download: %s', filename)
939 episode.download_filename = basename
940 episode.on_downloaded(filename)
941 found = True
942 break
944 if not found and not util.is_system_file(filename):
945 logger.warn('Unknown external file: %s', filename)
947 @classmethod
948 def sort_key(cls, podcast):
949 key = util.convert_bytes(podcast.title.lower())
950 return re.sub('^the ', '', key).translate(cls.UNICODE_TRANSLATE)
952 @classmethod
953 def load(cls, model, url, create=True, authentication_tokens=None, max_episodes=0):
954 existing = [p for p in model.get_podcasts() if p.url == url]
956 if existing:
957 return existing[0]
959 if create:
960 tmp = cls(model)
961 tmp.url = url
962 if authentication_tokens is not None:
963 tmp.auth_username = authentication_tokens[0]
964 tmp.auth_password = authentication_tokens[1]
966 # Save podcast, so it gets an ID assigned before
967 # updating the feed and adding saving episodes
968 tmp.save()
970 try:
971 tmp.update(max_episodes)
972 except Exception as e:
973 logger.debug('Fetch failed. Removing buggy feed.')
974 tmp.remove_downloaded()
975 tmp.delete()
976 raise
978 # Determine the section in which this podcast should appear
979 tmp.section = tmp._get_content_type()
981 # Determine a new download folder now that we have the title
982 tmp.get_save_dir(force_new=True)
984 # Mark episodes as downloaded if files already exist (bug 902)
985 tmp.check_download_folder()
987 # Determine common prefix of episode titles
988 tmp._determine_common_prefix()
990 tmp.save()
992 gpodder.user_extensions.on_podcast_subscribe(tmp)
994 return tmp
996 def episode_factory(self, d):
998 This function takes a dictionary containing key-value pairs for
999 episodes and returns a new PodcastEpisode object that is connected
1000 to this object.
1002 Returns: A new PodcastEpisode object
1004 return self.EpisodeClass.create_from_dict(d, self)
1006 def _consume_updated_title(self, new_title):
1007 # Replace multi-space and newlines with single space (Maemo bug 11173)
1008 new_title = re.sub('\s+', ' ', new_title).strip()
1010 # Only update the podcast-supplied title when we
1011 # don't yet have a title, or if the title is the
1012 # feed URL (e.g. we didn't find a title before).
1013 if not self.title or self.title == self.url:
1014 self.title = new_title
1016 # Start YouTube- and Vimeo-specific title FIX
1017 YOUTUBE_PREFIX = 'Uploads by '
1018 VIMEO_PREFIX = 'Vimeo / '
1019 if self.title.startswith(YOUTUBE_PREFIX):
1020 self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
1021 elif self.title.startswith(VIMEO_PREFIX):
1022 self.title = self.title[len(VIMEO_PREFIX):] + ' on Vimeo'
1023 # End YouTube- and Vimeo-specific title FIX
1025 def _consume_metadata(self, title, link, description, cover_url,
1026 payment_url):
1027 self._consume_updated_title(title)
1028 self.link = link
1029 self.description = description
1030 self.cover_url = cover_url
1031 self.payment_url = payment_url
1032 self.save()
1034 def _consume_updated_feed(self, feed, max_episodes=0):
1035 self._consume_metadata(feed.get_title() or self.url,
1036 feed.get_link() or self.link,
1037 feed.get_description() or '',
1038 feed.get_cover_url() or None,
1039 feed.get_payment_url() or None)
1041 # Update values for HTTP conditional requests
1042 self.http_etag = feed.get_http_etag() or self.http_etag
1043 self.http_last_modified = feed.get_http_last_modified() or self.http_last_modified
1045 # Load all episodes to update them properly.
1046 existing = self.get_all_episodes()
1047 # GUID-based existing episode list
1048 existing_guids = {e.guid: e for e in existing}
1050 # Get most recent published of all episodes
1051 last_published = self.db.get_last_published(self) or 0
1052 # fix for #516 an episode was marked published one month in the future (typo in month number)
1053 # causing every new episode to be marked old
1054 tomorrow = datetime.datetime.now().timestamp() + self.SECONDS_PER_DAY
1055 if last_published > tomorrow:
1056 logger.debug('Episode published in the future for podcast %s', self.title)
1057 last_published = tomorrow
1059 # new episodes from feed
1060 new_episodes, seen_guids = feed.get_new_episodes(self, existing_guids)
1062 # pagination
1063 next_feed = feed
1064 next_max_episodes = max_episodes - len(seen_guids)
1065 # want to paginate if:
1066 # - we raised the max episode count so we want more old episodes now
1067 # FIXME: could also be that feed has less episodes than max_episodes and we're paginating for nothing
1068 # - all episodes are new so we continue getting them until max_episodes is reached
1069 could_have_more = max_episodes > len(existing) or len(new_episodes) == len(seen_guids)
1070 while next_feed and could_have_more:
1071 if max_episodes > 0 and next_max_episodes <= 0:
1072 logger.debug("stopping pagination: seen enough episodes (%i)", max_episodes)
1073 break
1074 # brand new: try to load another page!
1075 next_result = next_feed.get_next_page(self, next_max_episodes)
1076 if next_result and next_result.status == feedcore.UPDATED_FEED:
1077 next_feed = next_result.feed
1078 for e in new_episodes:
1079 existing_guids[e.guid] = e
1080 next_new_episodes, next_seen_guids = next_feed.get_new_episodes(self, existing_guids)
1081 logger.debug("next page has %i new episodes", len(next_new_episodes))
1082 next_max_episodes -= len(next_seen_guids)
1083 new_episodes += next_new_episodes
1084 seen_guids = seen_guids.union(next_seen_guids)
1085 else:
1086 next_feed = None
1088 # mark episodes not new
1089 real_new_episode_count = 0
1090 # Search all entries for new episodes
1091 for episode in new_episodes:
1092 # Workaround for bug 340: If the episode has been
1093 # published earlier than one week before the most
1094 # recent existing episode, do not mark it as new.
1095 if episode.published < last_published - self.SECONDS_PER_WEEK:
1096 logger.debug('Episode with old date: %s', episode.title)
1097 episode.is_new = False
1098 episode.save()
1100 if episode.is_new:
1101 real_new_episode_count += 1
1103 # Only allow a certain number of new episodes per update
1104 if (self.download_strategy == PodcastChannel.STRATEGY_LATEST and
1105 real_new_episode_count > 1):
1106 episode.is_new = False
1107 episode.save()
1109 self.children.extend(new_episodes)
1111 self.remove_unreachable_episodes(existing, seen_guids, max_episodes)
1113 def remove_unreachable_episodes(self, existing, seen_guids, max_episodes):
1114 # Remove "unreachable" episodes - episodes that have not been
1115 # downloaded and that the feed does not list as downloadable anymore
1116 # Keep episodes that are currently being downloaded, though (bug 1534)
1117 if self.id is not None:
1118 episodes_to_purge = [e for e in existing if
1119 e.state != gpodder.STATE_DOWNLOADED and
1120 e.guid not in seen_guids and not e.downloading]
1122 for episode in episodes_to_purge:
1123 logger.debug('Episode removed from feed: %s (%s)',
1124 episode.title, episode.guid)
1125 gpodder.user_extensions.on_episode_removed_from_podcast(episode)
1126 self.db.delete_episode_by_guid(episode.guid, self.id)
1128 # Remove the episode from the "children" episodes list
1129 if self.children is not None:
1130 self.children.remove(episode)
1132 # This *might* cause episodes to be skipped if there were more than
1133 # max_episodes_per_feed items added to the feed between updates.
1134 # The benefit is that it prevents old episodes from apearing as new
1135 # in certain situations (see bug #340).
1136 self.db.purge(max_episodes, self.id) # TODO: Remove from self.children!
1138 # Sort episodes by pubdate, descending
1139 self.children.sort(key=lambda e: e.published, reverse=True)
1141 def update(self, max_episodes=0):
1142 max_episodes = int(max_episodes)
1143 try:
1144 result = self.feed_fetcher.fetch_channel(self, max_episodes)
1146 if result.status == feedcore.UPDATED_FEED:
1147 self._consume_updated_feed(result.feed, max_episodes)
1148 elif result.status == feedcore.NEW_LOCATION:
1149 url = result.feed
1150 logger.info('New feed location: %s => %s', self.url, url)
1151 if url in set(x.url for x in self.model.get_podcasts()):
1152 raise Exception('Already subscribed to ' + url)
1153 self.url = url
1154 # With the updated URL, fetch the feed again
1155 self.update(max_episodes)
1156 return
1157 elif result.status == feedcore.NOT_MODIFIED:
1158 pass
1160 self.save()
1161 except Exception as e:
1162 # "Not really" errors
1163 # feedcore.AuthenticationRequired
1164 # Temporary errors
1165 # feedcore.Offline
1166 # feedcore.BadRequest
1167 # feedcore.InternalServerError
1168 # feedcore.WifiLogin
1169 # Permanent errors
1170 # feedcore.Unsubscribe
1171 # feedcore.NotFound
1172 # feedcore.InvalidFeed
1173 # feedcore.UnknownStatusCode
1174 gpodder.user_extensions.on_podcast_update_failed(self, e)
1175 raise
1177 gpodder.user_extensions.on_podcast_updated(self)
1179 # Re-determine the common prefix for all episodes
1180 self._determine_common_prefix()
1182 self.db.commit()
1184 def delete(self):
1185 self.db.delete_podcast(self)
1186 self.model._remove_podcast(self)
1188 def save(self):
1189 if self.download_folder is None:
1190 self.get_save_dir()
1192 gpodder.user_extensions.on_podcast_save(self)
1194 self.db.save_podcast(self)
1195 self.model._append_podcast(self)
1197 def get_statistics(self):
1198 if self.id is None:
1199 return (0, 0, 0, 0, 0)
1200 else:
1201 return self.db.get_podcast_statistics(self.id)
1203 @property
1204 def group_by(self):
1205 if not self.section:
1206 self.section = self._get_content_type()
1207 self.save()
1209 return self.section
1211 def _get_content_type(self):
1212 if 'youtube.com' in self.url or 'vimeo.com' in self.url or 'escapistmagazine.com' in self.url:
1213 return _('Video')
1215 audio, video, other = 0, 0, 0
1216 for content_type in self.db.get_content_types(self.id):
1217 content_type = content_type.lower()
1218 if content_type.startswith('audio'):
1219 audio += 1
1220 elif content_type.startswith('video'):
1221 video += 1
1222 else:
1223 other += 1
1225 if audio >= video:
1226 return _('Audio')
1227 elif video > other:
1228 return _('Video')
1230 return _('Other')
1232 def authenticate_url(self, url):
1233 return util.url_add_authentication(url, self.auth_username, self.auth_password)
1235 def rename(self, new_title):
1236 new_title = new_title.strip()
1237 if self.title == new_title:
1238 return
1240 new_folder_name = self.find_unique_folder_name(new_title)
1241 if new_folder_name and new_folder_name != self.download_folder:
1242 new_folder = os.path.join(gpodder.downloads, new_folder_name)
1243 old_folder = os.path.join(gpodder.downloads, self.download_folder)
1244 if os.path.exists(old_folder):
1245 if not os.path.exists(new_folder):
1246 # Old folder exists, new folder does not -> simply rename
1247 logger.info('Renaming %s => %s', old_folder, new_folder)
1248 os.rename(old_folder, new_folder)
1249 else:
1250 # Both folders exist -> move files and delete old folder
1251 logger.info('Moving files from %s to %s', old_folder,
1252 new_folder)
1253 for file in glob.glob(os.path.join(old_folder, '*')):
1254 shutil.move(file, new_folder)
1255 logger.info('Removing %s', old_folder)
1256 shutil.rmtree(old_folder, ignore_errors=True)
1257 self.download_folder = new_folder_name
1259 self.title = new_title
1260 self.save()
1262 def _determine_common_prefix(self):
1263 # We need at least 2 episodes for the prefix to be "common" ;)
1264 if len(self.children) < 2:
1265 self._common_prefix = ''
1266 return
1268 prefix = os.path.commonprefix([x.title for x in self.children])
1269 # The common prefix must end with a space - otherwise it's not
1270 # on a word boundary, and we might end up chopping off too much
1271 if prefix and prefix[-1] != ' ':
1272 prefix = prefix[:prefix.rfind(' ') + 1]
1274 self._common_prefix = prefix
1276 def get_all_episodes(self):
1277 return self.children
1279 def get_episodes(self, state):
1280 return [e for e in self.get_all_episodes() if e.state == state]
1282 def find_unique_folder_name(self, download_folder):
1283 # Remove trailing dots to avoid errors on Windows (bug 600)
1284 # Also remove leading dots to avoid hidden folders on Linux
1285 download_folder = download_folder.strip('.' + string.whitespace)
1287 for folder_name in util.generate_names(download_folder):
1288 if (not self.db.podcast_download_folder_exists(folder_name) or
1289 self.download_folder == folder_name):
1290 return folder_name
1292 def get_save_dir(self, force_new=False):
1293 if self.download_folder is None or force_new:
1294 # we must change the folder name, because it has not been set manually
1295 fn_template = util.sanitize_filename(self.title, self.MAX_FOLDERNAME_LENGTH)
1297 if not fn_template:
1298 fn_template = util.sanitize_filename(self.url, self.MAX_FOLDERNAME_LENGTH)
1300 # Find a unique folder name for this podcast
1301 download_folder = self.find_unique_folder_name(fn_template)
1303 # Try removing the download folder if it has been created previously
1304 if self.download_folder is not None:
1305 folder = os.path.join(gpodder.downloads, self.download_folder)
1306 try:
1307 os.rmdir(folder)
1308 except OSError:
1309 logger.info('Old download folder is kept for %s', self.url)
1311 logger.info('Updating download_folder of %s to %s', self.url,
1312 download_folder)
1313 self.download_folder = download_folder
1314 self.save()
1316 save_dir = os.path.join(gpodder.downloads, self.download_folder)
1318 # Create save_dir if it does not yet exist
1319 if not util.make_directory(save_dir):
1320 logger.error('Could not create save_dir: %s', save_dir)
1322 return save_dir
1324 save_dir = property(fget=get_save_dir)
1326 def remove_downloaded(self):
1327 # Remove the download directory
1328 for episode in self.get_episodes(gpodder.STATE_DOWNLOADED):
1329 filename = episode.local_filename(create=False, check_only=True)
1330 if filename is not None:
1331 gpodder.user_extensions.on_episode_delete(episode, filename)
1333 shutil.rmtree(self.save_dir, True)
1335 @property
1336 def cover_file(self):
1337 return os.path.join(self.save_dir, 'folder')
1340 class Model(object):
1341 PodcastClass = PodcastChannel
1343 def __init__(self, db):
1344 self.db = db
1345 self.children = None
1347 def _append_podcast(self, podcast):
1348 if podcast not in self.children:
1349 self.children.append(podcast)
1351 def _remove_podcast(self, podcast):
1352 self.children.remove(podcast)
1353 gpodder.user_extensions.on_podcast_delete(self)
1355 def get_podcasts(self):
1356 def podcast_factory(dct, db):
1357 return self.PodcastClass.create_from_dict(dct, self, dct['id'])
1359 if self.children is None:
1360 self.children = self.db.load_podcasts(podcast_factory)
1362 # Check download folders for changes (bug 902)
1363 for podcast in self.children:
1364 podcast.check_download_folder()
1366 return self.children
1368 def get_podcast(self, url):
1369 for p in self.get_podcasts():
1370 if p.url == url:
1371 return p
1372 return None
1374 def load_podcast(self, url, create=True, authentication_tokens=None,
1375 max_episodes=0):
1376 assert all(url != podcast.url for podcast in self.get_podcasts())
1377 return self.PodcastClass.load(self, url, create,
1378 authentication_tokens,
1379 max_episodes)
1381 @classmethod
1382 def podcast_sort_key(cls, podcast):
1383 return cls.PodcastClass.sort_key(podcast)
1385 @classmethod
1386 def episode_sort_key(cls, episode):
1387 return episode.published
1389 @classmethod
1390 def sort_episodes_by_pubdate(cls, episodes, reverse=False):
1391 """Sort a list of PodcastEpisode objects chronologically
1393 Returns a iterable, sorted sequence of the episodes
1395 return sorted(episodes, key=cls.episode_sort_key, reverse=reverse)
1398 def check_root_folder_path():
1399 root = gpodder.home
1400 if gpodder.ui.win32:
1401 longest = len(root) \
1402 + 1 + PodcastChannel.MAX_FOLDERNAME_LENGTH \
1403 + 1 + PodcastEpisode.MAX_FILENAME_WITH_EXT_LENGTH
1404 if longest > 260:
1405 return _("Warning: path to gPodder home (%(root)s) is very long "
1406 "and can result in failure to download files.\n" % {"root": root}) \
1407 + _("You're advised to set it to a shorter path.")
1408 return None