Model: Fix cover_url setting
[gpodder.git] / src / gpodder / model.py
blobbf4f5bb19e75489a6c6ae9720a79618af2c80eb3
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2012 Thomas Perl and the gPodder Team
5 # Copyright (c) 2011 Neal H. Walfield
7 # gPodder is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # gPodder is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 # gpodder.model - Core model classes for gPodder (2009-08-13)
24 # Based on libpodcasts.py (thp, 2005-10-29)
27 import gpodder
28 from gpodder import util
29 from gpodder import feedcore
30 from gpodder import youtube
31 from gpodder import vimeo
32 from gpodder import schema
33 from gpodder import coverart
35 import logging
36 logger = logging.getLogger(__name__)
38 import os
39 import re
40 import glob
41 import shutil
42 import time
43 import datetime
45 import hashlib
46 import feedparser
47 import collections
48 import string
50 _ = gpodder.gettext
53 def get_payment_priority(url):
54 """
55 at the moment we only support flattr.com as an payment provider, so we
56 sort the payment providers and prefer flattr.com ("1" is higher priority than "2")
57 """
58 if 'flattr.com' in url:
59 return 1
60 return 2
62 class CustomFeed(feedcore.ExceptionWithData): pass
64 class gPodderFetcher(feedcore.Fetcher):
65 """
66 This class extends the feedcore Fetcher with the gPodder User-Agent and the
67 Proxy handler based on the current settings in gPodder.
68 """
69 custom_handlers = []
71 def __init__(self):
72 feedcore.Fetcher.__init__(self, gpodder.user_agent)
74 def fetch_channel(self, channel):
75 etag = channel.http_etag
76 modified = feedparser._parse_date(channel.http_last_modified)
77 # If we have a username or password, rebuild the url with them included
78 # Note: using a HTTPBasicAuthHandler would be pain because we need to
79 # know the realm. It can be done, but I think this method works, too
80 url = channel.authenticate_url(channel.url)
81 for handler in self.custom_handlers:
82 custom_feed = handler.handle_url(url)
83 if custom_feed is not None:
84 return feedcore.Result(feedcore.CUSTOM_FEED, custom_feed)
85 return self.fetch(url, etag, modified)
87 def _resolve_url(self, url):
88 url = youtube.get_real_channel_url(url)
89 url = vimeo.get_real_channel_url(url)
90 return url
92 @classmethod
93 def register(cls, handler):
94 cls.custom_handlers.append(handler)
96 # The "register" method is exposed here for external usage
97 register_custom_handler = gPodderFetcher.register
99 # Our podcast model:
101 # database -> podcast -> episode -> download/playback
102 # podcast.parent == db
103 # podcast.children == [episode, ...]
104 # episode.parent == podcast
106 # - normally: episode.children = (None, None)
107 # - downloading: episode.children = (DownloadTask(), None)
108 # - playback: episode.children = (None, PlaybackTask())
111 class PodcastModelObject(object):
113 A generic base class for our podcast model providing common helper
114 and utility functions.
116 __slots__ = ('id', 'parent', 'children', 'changed')
118 @classmethod
119 def create_from_dict(cls, d, *args):
121 Create a new object, passing "args" to the constructor
122 and then updating the object with the values from "d".
124 o = cls(*args)
126 o.changed = None
128 # XXX: all(map(lambda k: hasattr(o, k), d))?
129 for k, v in d.iteritems():
130 setattr(o, k, v)
132 o.changed = {}
134 return o
136 def __setattr__(self, name, value):
137 """Track changes once "self.changed" is a dictionary
139 The changed values will be stored in self.changed until
140 _clear_changes is called.
142 if getattr(self, 'changed', None) is not None and self.id is not None:
143 old_value = getattr(self, name, None)
145 if old_value is not None and value != old_value:
146 # Value changed (and it is not an initialization)
147 if name not in self.changed:
148 self.changed[name] = old_value
149 # logger.debug("%s: %s.%s changed: %s -> %s"
150 # % (self.__class__.__name__, self.id, name,
151 # old_value, value))
153 super(PodcastModelObject, self).__setattr__(name, value)
155 def _clear_changes(self):
156 # logger.debug("Changes: %s: %s"
157 # % ([getattr (self, a) for a in self.__slots__],
158 # str(self.changed),))
159 self.changed = {}
161 class PodcastEpisode(PodcastModelObject):
162 """holds data for one object in a channel"""
163 MAX_FILENAME_LENGTH = 200
165 __slots__ = schema.EpisodeColumns
167 def _deprecated(self):
168 raise Exception('Property is deprecated!')
170 is_played = property(fget=_deprecated, fset=_deprecated)
171 is_locked = property(fget=_deprecated, fset=_deprecated)
173 def has_website_link(self):
174 return bool(self.link) and (self.link != self.url or \
175 youtube.is_video_link(self.link))
177 @classmethod
178 def from_feedparser_entry(cls, entry, channel):
179 episode = cls(channel)
180 episode.guid = entry.get('id', '')
182 # Replace multi-space and newlines with single space (Maemo bug 11173)
183 episode.title = re.sub('\s+', ' ', entry.get('title', ''))
184 episode.link = entry.get('link', '')
185 if 'content' in entry and len(entry['content']) and \
186 entry['content'][0].get('type', '') == 'text/html':
187 episode.description = entry['content'][0].value
188 else:
189 episode.description = entry.get('summary', '')
191 # Fallback to subtitle if summary is not available
192 if not episode.description:
193 episode.description = entry.get('subtitle', '')
195 try:
196 total_time = 0
198 # Parse iTunes-specific podcast duration metadata
199 itunes_duration = entry.get('itunes_duration', '')
200 if itunes_duration:
201 total_time = util.parse_time(itunes_duration)
203 # Parse time from YouTube descriptions if it's a YouTube feed
204 if youtube.is_youtube_guid(episode.guid):
205 result = re.search(r'Time:<[^>]*>\n<[^>]*>([:0-9]*)<',
206 episode.description)
207 if result:
208 youtube_duration = result.group(1)
209 total_time = util.parse_time(youtube_duration)
211 episode.total_time = total_time
212 except:
213 pass
215 episode.published = feedcore.get_pubdate(entry)
217 enclosures = entry.get('enclosures', [])
218 media_rss_content = entry.get('media_content', [])
219 audio_available = any(e.get('type', '').startswith('audio/') \
220 for e in enclosures + media_rss_content)
221 video_available = any(e.get('type', '').startswith('video/') \
222 for e in enclosures + media_rss_content)
224 # XXX: Make it possible for hooks/extensions to override this by
225 # giving them a list of enclosures and the "self" object (podcast)
226 # and letting them sort and/or filter the list of enclosures to
227 # get the desired enclosure picked by the algorithm below.
228 filter_and_sort_enclosures = lambda x: x
230 # read the flattr auto-url, if exists
231 payment_info = [link['href'] for link in entry.get('links', [])
232 if link['rel'] == 'payment']
233 if payment_info:
234 episode.payment_url = sorted(payment_info, key=get_payment_priority)[0]
236 # Enclosures
237 for e in filter_and_sort_enclosures(enclosures):
238 episode.mime_type = e.get('type', 'application/octet-stream')
239 if episode.mime_type == '':
240 # See Maemo bug 10036
241 logger.warn('Fixing empty mimetype in ugly feed')
242 episode.mime_type = 'application/octet-stream'
244 if '/' not in episode.mime_type:
245 continue
247 # Skip images in feeds if audio or video is available (bug 979)
248 # This must (and does) also look in Media RSS enclosures (bug 1430)
249 if episode.mime_type.startswith('image/') and \
250 (audio_available or video_available):
251 continue
253 # If we have audio or video available later on, skip
254 # 'application/octet-stream' data types (fixes Linux Outlaws)
255 if episode.mime_type == 'application/octet-stream' and \
256 (audio_available or video_available):
257 continue
259 episode.url = util.normalize_feed_url(e.get('href', ''))
260 if not episode.url:
261 continue
263 try:
264 episode.file_size = int(e.length) or -1
265 except:
266 episode.file_size = -1
268 return episode
270 # Media RSS content
271 for m in filter_and_sort_enclosures(media_rss_content):
272 episode.mime_type = m.get('type', 'application/octet-stream')
273 if '/' not in episode.mime_type:
274 continue
276 # Skip images in Media RSS if we have audio/video (bug 1444)
277 if episode.mime_type.startswith('image/') and \
278 (audio_available or video_available):
279 continue
281 episode.url = util.normalize_feed_url(m.get('url', ''))
282 if not episode.url:
283 continue
285 try:
286 episode.file_size = int(m.get('filesize', 0)) or -1
287 except:
288 episode.file_size = -1
290 try:
291 episode.total_time = int(m.get('duration', 0)) or 0
292 except:
293 episode.total_time = 0
295 return episode
297 # Brute-force detection of any links
298 for l in entry.get('links', ()):
299 episode.url = util.normalize_feed_url(l.get('href', ''))
300 if not episode.url:
301 continue
303 if (youtube.is_video_link(episode.url) or \
304 vimeo.is_video_link(episode.url)):
305 return episode
307 # Check if we can resolve this link to a audio/video file
308 filename, extension = util.filename_from_url(episode.url)
309 file_type = util.file_type_by_extension(extension)
310 if file_type is None and hasattr(l, 'type'):
311 extension = util.extension_from_mimetype(l.type)
312 file_type = util.file_type_by_extension(extension)
314 # The link points to a audio or video file - use it!
315 if file_type is not None:
316 return episode
318 return None
320 def __init__(self, channel):
321 self.parent = channel
322 self.podcast_id = self.parent.id
323 self.children = (None, None)
325 self.id = None
326 self.url = ''
327 self.title = ''
328 self.file_size = 0
329 self.mime_type = 'application/octet-stream'
330 self.guid = ''
331 self.description = ''
332 self.link = ''
333 self.published = 0
334 self.download_filename = None
335 self.payment_url = None
337 self.state = gpodder.STATE_NORMAL
338 self.is_new = True
339 self.archive = channel.auto_archive_episodes
341 # Time attributes
342 self.total_time = 0
343 self.current_position = 0
344 self.current_position_updated = 0
346 # Timestamp of last playback time
347 self.last_playback = 0
349 @property
350 def channel(self):
351 return self.parent
353 @property
354 def db(self):
355 return self.parent.parent.db
357 @property
358 def trimmed_title(self):
359 """Return the title with the common prefix trimmed"""
360 # Minimum amount of leftover characters after trimming. This
361 # avoids things like "Common prefix 123" to become just "123".
362 # If there are LEFTOVER_MIN or less characters after trimming,
363 # the original title will be returned without trimming.
364 LEFTOVER_MIN = 5
366 # "Podcast Name - Title" and "Podcast Name: Title" -> "Title"
367 for postfix in (' - ', ': '):
368 prefix = self.parent.title + postfix
369 if (self.title.startswith(prefix) and
370 len(self.title)-len(prefix) > LEFTOVER_MIN):
371 return self.title[len(prefix):]
373 regex_patterns = [
374 # "Podcast Name <number>: ..." -> "<number>: ..."
375 r'^%s (\d+: .*)' % re.escape(self.parent.title),
377 # "Episode <number>: ..." -> "<number>: ..."
378 r'Episode (\d+:.*)',
381 for pattern in regex_patterns:
382 if re.match(pattern, self.title):
383 title = re.sub(pattern, r'\1', self.title)
384 if len(title) > LEFTOVER_MIN:
385 return title
387 # "#001: Title" -> "001: Title"
388 if (not self.parent._common_prefix and re.match('^#\d+: ',
389 self.title) and len(self.title)-1 > LEFTOVER_MIN):
390 return self.title[1:]
392 if (self.parent._common_prefix is not None and
393 self.title.startswith(self.parent._common_prefix) and
394 len(self.title)-len(self.parent._common_prefix) > LEFTOVER_MIN):
395 return self.title[len(self.parent._common_prefix):]
397 return self.title
399 def _set_download_task(self, download_task):
400 self.children = (download_task, self.children[1])
402 def _get_download_task(self):
403 return self.children[0]
405 download_task = property(_get_download_task, _set_download_task)
407 @property
408 def downloading(self):
409 task = self.download_task
410 if task is None:
411 return False
413 return task.status in (task.DOWNLOADING, task.QUEUED, task.PAUSED)
415 def check_is_new(self):
416 return (self.state == gpodder.STATE_NORMAL and self.is_new and
417 not self.downloading)
419 def save(self):
420 gpodder.user_extensions.on_episode_save(self)
422 self._clear_changes()
424 self.db.save_episode(self)
426 def on_downloaded(self, filename):
427 self.state = gpodder.STATE_DOWNLOADED
428 self.is_new = True
429 self.file_size = os.path.getsize(filename)
430 self.save()
432 def set_state(self, state):
433 self.state = state
434 self.save()
436 def playback_mark(self):
437 self.is_new = False
438 self.last_playback = int(time.time())
439 self.save()
441 def mark(self, state=None, is_played=None, is_locked=None):
442 if state is not None:
443 self.state = state
444 if is_played is not None:
445 self.is_new = not is_played
447 # "Mark as new" must "undelete" the episode
448 if self.is_new and self.state == gpodder.STATE_DELETED:
449 self.state = gpodder.STATE_NORMAL
450 if is_locked is not None:
451 self.archive = is_locked
452 self.save()
454 def age_in_days(self):
455 return util.file_age_in_days(self.local_filename(create=False, \
456 check_only=True))
458 age_int_prop = property(fget=age_in_days)
460 def get_age_string(self):
461 return util.file_age_to_string(self.age_in_days())
463 age_prop = property(fget=get_age_string)
465 @property
466 def description_html(self):
467 # XXX: That's not a very well-informed heuristic to check
468 # if the description already contains HTML. Better ideas?
469 if '<' in self.description:
470 return self.description
472 return self.description.replace('\n', '<br>')
474 def one_line_description(self):
475 MAX_LINE_LENGTH = 120
476 desc = util.remove_html_tags(self.description or '')
477 desc = re.sub('\s+', ' ', desc).strip()
478 if not desc:
479 return _('No description available')
480 else:
481 # Decode the description to avoid gPodder bug 1277
482 desc = util.convert_bytes(desc).strip()
484 if len(desc) > MAX_LINE_LENGTH:
485 return desc[:MAX_LINE_LENGTH] + '...'
486 else:
487 return desc
489 def delete_from_disk(self):
490 filename = self.local_filename(create=False, check_only=True)
491 if filename is not None:
492 gpodder.user_extensions.on_episode_delete(self, filename)
493 util.delete_file(filename)
495 self.set_state(gpodder.STATE_DELETED)
497 def get_playback_url(self, fmt_ids=None, allow_partial=False):
498 """Local (or remote) playback/streaming filename/URL
500 Returns either the local filename or a streaming URL that
501 can be used to playback this episode.
503 Also returns the filename of a partially downloaded file
504 in case partial (preview) playback is desired.
506 url = self.local_filename(create=False)
508 if (allow_partial and url is not None and
509 os.path.exists(url + '.partial')):
510 return url + '.partial'
512 if url is None or not os.path.exists(url):
513 url = self.url
514 url = youtube.get_real_download_url(url, fmt_ids)
515 url = vimeo.get_real_download_url(url)
517 return url
519 def find_unique_file_name(self, filename, extension):
520 # Remove leading and trailing whitespace + dots (to avoid hidden files)
521 filename = filename.strip('.' + string.whitespace) + extension
523 for name in util.generate_names(filename):
524 if (not self.db.episode_filename_exists(self.podcast_id, name) or
525 self.download_filename == name):
526 return name
528 def local_filename(self, create, force_update=False, check_only=False,
529 template=None, return_wanted_filename=False):
530 """Get (and possibly generate) the local saving filename
532 Pass create=True if you want this function to generate a
533 new filename if none exists. You only want to do this when
534 planning to create/download the file after calling this function.
536 Normally, you should pass create=False. This will only
537 create a filename when the file already exists from a previous
538 version of gPodder (where we used md5 filenames). If the file
539 does not exist (and the filename also does not exist), this
540 function will return None.
542 If you pass force_update=True to this function, it will try to
543 find a new (better) filename and move the current file if this
544 is the case. This is useful if (during the download) you get
545 more information about the file, e.g. the mimetype and you want
546 to include this information in the file name generation process.
548 If check_only=True is passed to this function, it will never try
549 to rename the file, even if would be a good idea. Use this if you
550 only want to check if a file exists.
552 If "template" is specified, it should be a filename that is to
553 be used as a template for generating the "real" filename.
555 The generated filename is stored in the database for future access.
557 If return_wanted_filename is True, the filename will not be written to
558 the database, but simply returned by this function (for use by the
559 "import external downloads" feature).
561 if self.download_filename is None and (check_only or not create):
562 return None
564 ext = self.extension(may_call_local_filename=False).encode('utf-8', 'ignore')
566 if not check_only and (force_update or not self.download_filename):
567 # Avoid and catch gPodder bug 1440 and similar situations
568 if template == '':
569 logger.warn('Empty template. Report this podcast URL %s',
570 self.channel.url)
571 template = None
573 # Try to find a new filename for the current file
574 if template is not None:
575 # If template is specified, trust the template's extension
576 episode_filename, ext = os.path.splitext(template)
577 else:
578 episode_filename, _ = util.filename_from_url(self.url)
579 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
581 if 'redirect' in fn_template and template is None:
582 # This looks like a redirection URL - force URL resolving!
583 logger.warn('Looks like a redirection to me: %s', self.url)
584 url = util.get_real_url(self.channel.authenticate_url(self.url))
585 logger.info('Redirection resolved to: %s', url)
586 episode_filename, _ = util.filename_from_url(url)
587 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
589 # Use title for YouTube downloads and Soundcloud streams
590 if youtube.is_video_link(self.url) or fn_template == 'stream':
591 sanitized = util.sanitize_filename(self.title, self.MAX_FILENAME_LENGTH)
592 if sanitized:
593 fn_template = sanitized
595 # If the basename is empty, use the md5 hexdigest of the URL
596 if not fn_template or fn_template.startswith('redirect.'):
597 logger.error('Report this feed: Podcast %s, episode %s',
598 self.channel.url, self.url)
599 fn_template = hashlib.md5(self.url).hexdigest()
601 # Find a unique filename for this episode
602 wanted_filename = self.find_unique_file_name(fn_template, ext)
604 if return_wanted_filename:
605 # return the calculated filename without updating the database
606 return wanted_filename
608 # The old file exists, but we have decided to want a different filename
609 if self.download_filename and wanted_filename != self.download_filename:
610 # there might be an old download folder crawling around - move it!
611 new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
612 old_file_name = os.path.join(self.channel.save_dir, self.download_filename)
613 if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
614 logger.info('Renaming %s => %s', old_file_name, new_file_name)
615 os.rename(old_file_name, new_file_name)
616 elif force_update and not os.path.exists(old_file_name):
617 # When we call force_update, the file might not yet exist when we
618 # call it from the downloading code before saving the file
619 logger.info('Choosing new filename: %s', new_file_name)
620 else:
621 logger.warn('%s exists or %s does not', new_file_name, old_file_name)
622 logger.info('Updating filename of %s to "%s".', self.url, wanted_filename)
623 elif self.download_filename is None:
624 logger.info('Setting download filename: %s', wanted_filename)
625 self.download_filename = wanted_filename
626 self.save()
628 return os.path.join(util.sanitize_encoding(self.channel.save_dir),
629 util.sanitize_encoding(self.download_filename))
631 def set_mimetype(self, mimetype, commit=False):
632 """Sets the mimetype for this episode"""
633 self.mime_type = mimetype
634 if commit:
635 self.db.commit()
637 def extension(self, may_call_local_filename=True):
638 filename, ext = util.filename_from_url(self.url)
639 if may_call_local_filename:
640 filename = self.local_filename(create=False)
641 if filename is not None:
642 filename, ext = os.path.splitext(filename)
643 # if we can't detect the extension from the url fallback on the mimetype
644 if ext == '' or util.file_type_by_extension(ext) is None:
645 ext = util.extension_from_mimetype(self.mime_type)
646 return ext
648 def mark_new(self):
649 self.is_new = True
650 self.save()
652 def mark_old(self):
653 self.is_new = False
654 self.save()
656 def file_exists(self):
657 filename = self.local_filename(create=False, check_only=True)
658 if filename is None:
659 return False
660 else:
661 return os.path.exists(filename)
663 def was_downloaded(self, and_exists=False):
664 if self.state != gpodder.STATE_DOWNLOADED:
665 return False
666 if and_exists and not self.file_exists():
667 return False
668 return True
670 def sync_filename(self, use_custom=False, custom_format=None):
671 if use_custom:
672 return util.object_string_formatter(custom_format,
673 episode=self, podcast=self.channel)
674 else:
675 return self.title
677 def file_type(self):
678 # Assume all YouTube/Vimeo links are video files
679 if youtube.is_video_link(self.url) or vimeo.is_video_link(self.url):
680 return 'video'
682 return util.file_type_by_extension(self.extension())
684 @property
685 def basename( self):
686 return os.path.splitext( os.path.basename( self.url))[0]
688 @property
689 def pubtime(self):
691 Returns published time as HHMM (or 0000 if not available)
693 try:
694 return datetime.datetime.fromtimestamp(self.published).strftime('%H%M')
695 except:
696 logger.warn('Cannot format pubtime: %s', self.title, exc_info=True)
697 return '0000'
699 def playlist_title(self):
700 """Return a title for this episode in a playlist
702 The title will be composed of the podcast name, the
703 episode name and the publication date. The return
704 value is the canonical representation of this episode
705 in playlists (for example, M3U playlists).
707 return '%s - %s (%s)' % (self.channel.title, \
708 self.title, \
709 self.cute_pubdate())
711 def cute_pubdate(self):
712 result = util.format_date(self.published)
713 if result is None:
714 return '(%s)' % _('unknown')
715 else:
716 return result
718 pubdate_prop = property(fget=cute_pubdate)
720 @property
721 def sortdate(self):
722 return str(datetime.datetime.fromtimestamp(self.published).strftime('%F'))
724 def calculate_filesize(self):
725 filename = self.local_filename(create=False)
726 if filename is None:
727 return
729 try:
730 self.file_size = os.path.getsize(filename)
731 except:
732 logger.error('Could not get file size: %s', filename, exc_info=True)
734 def is_finished(self):
735 """Return True if this episode is considered "finished playing"
737 An episode is considered "finished" when there is a
738 current position mark on the track, and when the
739 current position is greater than 99 percent of the
740 total time or inside the last 10 seconds of a track.
742 return self.current_position > 0 and self.total_time > 0 and \
743 (self.current_position + 10 >= self.total_time or \
744 self.current_position >= self.total_time*.99)
746 def get_play_info_string(self, duration_only=False):
747 duration = util.format_time(self.total_time)
748 if duration_only and self.total_time > 0:
749 return duration
750 elif self.current_position > 0 and \
751 self.current_position != self.total_time:
752 position = util.format_time(self.current_position)
753 return '%s / %s' % (position, duration)
754 elif self.total_time > 0:
755 return duration
756 else:
757 return '-'
759 def update_from(self, episode):
760 for k in ('title', 'url', 'description', 'link', 'published', 'guid', 'file_size', 'payment_url'):
761 setattr(self, k, getattr(episode, k))
764 class PodcastChannel(PodcastModelObject):
765 __slots__ = schema.PodcastColumns + ('_common_prefix',)
767 UNICODE_TRANSLATE = {ord(u'ö'): u'o', ord(u'ä'): u'a', ord(u'ü'): u'u'}
769 # Enumerations for download strategy
770 STRATEGY_DEFAULT, STRATEGY_LATEST = range(2)
772 # Description and ordering of strategies
773 STRATEGIES = [
774 (STRATEGY_DEFAULT, _('Default')),
775 (STRATEGY_LATEST, _('Only keep latest')),
778 MAX_FOLDERNAME_LENGTH = 60
779 SECONDS_PER_WEEK = 7*24*60*60
780 EpisodeClass = PodcastEpisode
782 feed_fetcher = gPodderFetcher()
784 def __init__(self, model):
785 self.parent = model
786 self.children = None
788 self.id = None
789 self.url = None
790 self.title = ''
791 self.link = ''
792 self.description = ''
793 self.cover_url = None
794 self.payment_url = None
796 self.auth_username = ''
797 self.auth_password = ''
799 self.http_last_modified = None
800 self.http_etag = None
802 self.auto_archive_episodes = False
803 self.download_folder = None
804 self.pause_subscription = False
805 self.sync_to_mp3_player = True
807 self.section = _('Other')
808 self._common_prefix = None
809 self.download_strategy = PodcastChannel.STRATEGY_DEFAULT
811 @property
812 def model(self):
813 return self.parent
815 @property
816 def db(self):
817 return self.parent.db
819 def get_download_strategies(self):
820 for value, caption in PodcastChannel.STRATEGIES:
821 yield self.download_strategy == value, value, caption
823 def set_download_strategy(self, download_strategy):
824 if download_strategy == self.download_strategy:
825 return
827 caption = dict(self.STRATEGIES).get(download_strategy)
828 if caption is not None:
829 logger.debug('Strategy for %s changed to %s', self.title, caption)
830 self.download_strategy = download_strategy
831 else:
832 logger.warn('Cannot set strategy to %d', download_strategy)
834 def check_download_folder(self):
835 """Check the download folder for externally-downloaded files
837 This will try to assign downloaded files with episodes in the
838 database.
840 This will also cause missing files to be marked as deleted.
842 known_files = set()
844 for episode in self.get_downloaded_episodes():
845 if episode.was_downloaded():
846 filename = episode.local_filename(create=False)
847 if not os.path.exists(filename):
848 # File has been deleted by the user - simulate a
849 # delete event (also marks the episode as deleted)
850 logger.debug('Episode deleted: %s', filename)
851 episode.delete_from_disk()
852 continue
854 known_files.add(filename)
856 existing_files = set(filename for filename in \
857 glob.glob(os.path.join(self.save_dir, '*')) \
858 if not filename.endswith('.partial'))
860 ignore_files = ['folder'+ext for ext in
861 coverart.CoverDownloader.EXTENSIONS]
863 external_files = existing_files.difference(list(known_files) +
864 [os.path.join(self.save_dir, ignore_file)
865 for ignore_file in ignore_files])
866 if not external_files:
867 return
869 all_episodes = self.get_all_episodes()
871 for filename in external_files:
872 found = False
874 basename = os.path.basename(filename)
875 existing = [e for e in all_episodes if e.download_filename == basename]
876 if existing:
877 existing = existing[0]
878 logger.info('Importing external download: %s', filename)
879 existing.on_downloaded(filename)
880 continue
882 for episode in all_episodes:
883 wanted_filename = episode.local_filename(create=True, \
884 return_wanted_filename=True)
885 if basename == wanted_filename:
886 logger.info('Importing external download: %s', filename)
887 episode.download_filename = basename
888 episode.on_downloaded(filename)
889 found = True
890 break
892 wanted_base, wanted_ext = os.path.splitext(wanted_filename)
893 target_base, target_ext = os.path.splitext(basename)
894 if wanted_base == target_base:
895 # Filenames only differ by the extension
896 wanted_type = util.file_type_by_extension(wanted_ext)
897 target_type = util.file_type_by_extension(target_ext)
899 # If wanted type is None, assume that we don't know
900 # the right extension before the download (e.g. YouTube)
901 # if the wanted type is the same as the target type,
902 # assume that it's the correct file
903 if wanted_type is None or wanted_type == target_type:
904 logger.info('Importing external download: %s', filename)
905 episode.download_filename = basename
906 episode.on_downloaded(filename)
907 found = True
908 break
910 if not found and not util.is_system_file(filename):
911 logger.warn('Unknown external file: %s', filename)
913 @classmethod
914 def sort_key(cls, podcast):
915 key = util.convert_bytes(podcast.title.lower())
916 return re.sub('^the ', '', key).translate(cls.UNICODE_TRANSLATE)
918 @classmethod
919 def load(cls, model, url, create=True, authentication_tokens=None,\
920 max_episodes=0):
921 if isinstance(url, unicode):
922 url = url.encode('utf-8')
924 existing = filter(lambda p: p.url == url, model.get_podcasts())
926 if existing:
927 return existing[0]
929 if create:
930 tmp = cls(model)
931 tmp.url = url
932 if authentication_tokens is not None:
933 tmp.auth_username = authentication_tokens[0]
934 tmp.auth_password = authentication_tokens[1]
936 # Save podcast, so it gets an ID assigned before
937 # updating the feed and adding saving episodes
938 tmp.save()
940 try:
941 tmp.update(max_episodes)
942 except Exception, e:
943 logger.debug('Fetch failed. Removing buggy feed.')
944 tmp.remove_downloaded()
945 tmp.delete()
946 raise
948 # Determine the section in which this podcast should appear
949 tmp.section = tmp._get_content_type()
951 # Determine a new download folder now that we have the title
952 tmp.get_save_dir(force_new=True)
954 # Mark episodes as downloaded if files already exist (bug 902)
955 tmp.check_download_folder()
957 # Determine common prefix of episode titles
958 tmp._determine_common_prefix()
960 tmp.save()
962 gpodder.user_extensions.on_podcast_subscribe(tmp)
964 return tmp
966 def episode_factory(self, d):
968 This function takes a dictionary containing key-value pairs for
969 episodes and returns a new PodcastEpisode object that is connected
970 to this object.
972 Returns: A new PodcastEpisode object
974 return self.EpisodeClass.create_from_dict(d, self)
976 def _consume_updated_title(self, new_title):
977 # Replace multi-space and newlines with single space (Maemo bug 11173)
978 new_title = re.sub('\s+', ' ', new_title).strip()
980 # Only update the podcast-supplied title when we
981 # don't yet have a title, or if the title is the
982 # feed URL (e.g. we didn't find a title before).
983 if not self.title or self.title == self.url:
984 self.title = new_title
986 # Start YouTube- and Vimeo-specific title FIX
987 YOUTUBE_PREFIX = 'Uploads by '
988 VIMEO_PREFIX = 'Vimeo / '
989 if self.title.startswith(YOUTUBE_PREFIX):
990 self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
991 elif self.title.startswith(VIMEO_PREFIX):
992 self.title = self.title[len(VIMEO_PREFIX):] + ' on Vimeo'
993 # End YouTube- and Vimeo-specific title FIX
995 def _consume_metadata(self, title, link, description, cover_url,
996 payment_url):
997 self._consume_updated_title(title)
998 self.link = link
999 self.description = description
1000 self.cover_url = cover_url
1001 self.payment_url = payment_url
1002 self.save()
1004 def _consume_custom_feed(self, custom_feed, max_episodes=0):
1005 self._consume_metadata(custom_feed.get_title(),
1006 custom_feed.get_link(),
1007 custom_feed.get_description(),
1008 custom_feed.get_image(),
1009 None)
1011 existing = self.get_all_episodes()
1012 existing_guids = [episode.guid for episode in existing]
1014 # Insert newly-found episodes into the database + local cache
1015 new_episodes, seen_guids = custom_feed.get_new_episodes(self, existing_guids)
1016 self.children.extend(new_episodes)
1018 self.remove_unreachable_episodes(existing, seen_guids, max_episodes)
1020 def _consume_updated_feed(self, feed, max_episodes=0):
1021 # Cover art URL
1022 if hasattr(feed.feed, 'image'):
1023 for attribute in ('href', 'url'):
1024 new_value = getattr(feed.feed.image, attribute, None)
1025 if new_value is not None:
1026 cover_url = new_value
1027 elif hasattr(feed.feed, 'icon'):
1028 cover_url = feed.feed.icon
1029 else:
1030 cover_url = None
1032 # Payment URL (Flattr auto-payment) information
1033 payment_info = [link['href'] for link in feed.feed.get('links', [])
1034 if link['rel'] == 'payment']
1035 if payment_info:
1036 payment_url = sorted(payment_info, key=get_payment_priority)[0]
1037 else:
1038 payment_url = None
1040 self._consume_metadata(feed.feed.get('title', self.url),
1041 feed.feed.get('link', self.link),
1042 feed.feed.get('subtitle', self.description),
1043 cover_url,
1044 payment_url)
1046 # Load all episodes to update them properly.
1047 existing = self.get_all_episodes()
1049 # We have to sort the entries in descending chronological order,
1050 # because if the feed lists items in ascending order and has >
1051 # max_episodes old episodes, new episodes will not be shown.
1052 # See also: gPodder Bug 1186
1053 entries = sorted(feed.entries, key=feedcore.get_pubdate, reverse=True)
1055 # We can limit the maximum number of entries that gPodder will parse
1056 if max_episodes > 0 and len(entries) > max_episodes:
1057 entries = entries[:max_episodes]
1059 # GUID-based existing episode list
1060 existing_guids = dict((e.guid, e) for e in existing)
1062 # Get most recent published of all episodes
1063 last_published = self.db.get_last_published(self) or 0
1065 # Keep track of episode GUIDs currently seen in the feed
1066 seen_guids = set()
1068 # Number of new episodes found
1069 new_episodes = 0
1071 # Search all entries for new episodes
1072 for entry in entries:
1073 episode = self.EpisodeClass.from_feedparser_entry(entry, self)
1074 if episode is not None:
1075 if not episode.title:
1076 logger.warn('Using filename as title for %s', episode.url)
1077 basename = os.path.basename(episode.url)
1078 episode.title, ext = os.path.splitext(basename)
1080 # Maemo bug 12073
1081 if not episode.guid:
1082 logger.warn('Using download URL as GUID for %s', episode.title)
1083 episode.guid = episode.url
1085 seen_guids.add(episode.guid)
1086 else:
1087 continue
1089 # Detect (and update) existing episode based on GUIDs
1090 existing_episode = existing_guids.get(episode.guid, None)
1091 if existing_episode:
1092 existing_episode.update_from(episode)
1093 existing_episode.save()
1094 continue
1096 # Workaround for bug 340: If the episode has been
1097 # published earlier than one week before the most
1098 # recent existing episode, do not mark it as new.
1099 if episode.published < last_published - self.SECONDS_PER_WEEK:
1100 logger.debug('Episode with old date: %s', episode.title)
1101 episode.is_new = False
1103 if episode.is_new:
1104 new_episodes += 1
1106 # Only allow a certain number of new episodes per update
1107 if (self.download_strategy == PodcastChannel.STRATEGY_LATEST and
1108 new_episodes > 1):
1109 episode.is_new = False
1111 episode.save()
1112 self.children.append(episode)
1114 self.remove_unreachable_episodes(existing, seen_guids, max_episodes)
1116 def remove_unreachable_episodes(self, existing, seen_guids, max_episodes):
1117 # Remove "unreachable" episodes - episodes that have not been
1118 # downloaded and that the feed does not list as downloadable anymore
1119 # Keep episodes that are currently being downloaded, though (bug 1534)
1120 if self.id is not None:
1121 episodes_to_purge = (e for e in existing if
1122 e.state != gpodder.STATE_DOWNLOADED and
1123 e.guid not in seen_guids and not e.downloading)
1125 for episode in episodes_to_purge:
1126 logger.debug('Episode removed from feed: %s (%s)',
1127 episode.title, episode.guid)
1128 gpodder.user_extensions.on_episode_removed_from_podcast(episode)
1129 self.db.delete_episode_by_guid(episode.guid, self.id)
1131 # Remove the episode from the "children" episodes list
1132 if self.children is not None:
1133 self.children.remove(episode)
1135 # This *might* cause episodes to be skipped if there were more than
1136 # max_episodes_per_feed items added to the feed between updates.
1137 # The benefit is that it prevents old episodes from apearing as new
1138 # in certain situations (see bug #340).
1139 self.db.purge(max_episodes, self.id) # TODO: Remove from self.children!
1141 # Sort episodes by pubdate, descending
1142 self.children.sort(key=lambda e: e.published, reverse=True)
1144 def update(self, max_episodes=0):
1145 try:
1146 result = self.feed_fetcher.fetch_channel(self)
1148 if result.status == feedcore.CUSTOM_FEED:
1149 self._consume_custom_feed(result.feed, max_episodes)
1150 elif result.status == feedcore.UPDATED_FEED:
1151 self._consume_updated_feed(result.feed, max_episodes)
1152 elif result.status == feedcore.NEW_LOCATION:
1153 url = result.feed.href
1154 logger.info('New feed location: %s => %s', self.url, url)
1155 if url in set(x.url for x in self.model.get_podcasts()):
1156 raise Exception('Already subscribed to ' + url)
1157 self.url = url
1158 self._consume_updated_feed(result.feed, max_episodes)
1159 elif result.status == feedcore.NOT_MODIFIED:
1160 pass
1162 if hasattr(result.feed, 'headers'):
1163 self.http_etag = result.feed.headers.get('etag', self.http_etag)
1164 self.http_last_modified = result.feed.headers.get('last-modified', self.http_last_modified)
1165 self.save()
1166 except Exception, e:
1167 # "Not really" errors
1168 #feedcore.AuthenticationRequired
1169 # Temporary errors
1170 #feedcore.Offline
1171 #feedcore.BadRequest
1172 #feedcore.InternalServerError
1173 #feedcore.WifiLogin
1174 # Permanent errors
1175 #feedcore.Unsubscribe
1176 #feedcore.NotFound
1177 #feedcore.InvalidFeed
1178 #feedcore.UnknownStatusCode
1179 gpodder.user_extensions.on_podcast_update_failed(self, e)
1180 raise
1182 gpodder.user_extensions.on_podcast_updated(self)
1184 # Re-determine the common prefix for all episodes
1185 self._determine_common_prefix()
1187 self.db.commit()
1189 def delete(self):
1190 self.db.delete_podcast(self)
1191 self.model._remove_podcast(self)
1193 def save(self):
1194 if self.download_folder is None:
1195 self.get_save_dir()
1197 gpodder.user_extensions.on_podcast_save(self)
1199 self._clear_changes()
1201 self.db.save_podcast(self)
1202 self.model._append_podcast(self)
1204 def get_statistics(self):
1205 if self.id is None:
1206 return (0, 0, 0, 0, 0)
1207 else:
1208 return self.db.get_podcast_statistics(self.id)
1210 @property
1211 def group_by(self):
1212 if not self.section:
1213 self.section = self._get_content_type()
1214 self.save()
1216 return self.section
1218 def _get_content_type(self):
1219 if 'youtube.com' in self.url or 'vimeo.com' in self.url:
1220 return _('Video')
1222 audio, video, other = 0, 0, 0
1223 for content_type in self.db.get_content_types(self.id):
1224 content_type = content_type.lower()
1225 if content_type.startswith('audio'):
1226 audio += 1
1227 elif content_type.startswith('video'):
1228 video += 1
1229 else:
1230 other += 1
1232 if audio >= video:
1233 return _('Audio')
1234 elif video > other:
1235 return _('Video')
1237 return _('Other')
1239 def authenticate_url(self, url):
1240 return util.url_add_authentication(url, self.auth_username, self.auth_password)
1242 def _get_cover_url(self):
1243 return self.cover_url
1245 image = property(_get_cover_url)
1247 def rename(self, new_title):
1248 new_title = new_title.strip()
1249 if self.title == new_title:
1250 return
1252 new_folder_name = self.find_unique_folder_name(new_title)
1253 if new_folder_name and new_folder_name != self.download_folder:
1254 new_folder = os.path.join(gpodder.downloads, new_folder_name)
1255 old_folder = os.path.join(gpodder.downloads, self.download_folder)
1256 if os.path.exists(old_folder):
1257 if not os.path.exists(new_folder):
1258 # Old folder exists, new folder does not -> simply rename
1259 logger.info('Renaming %s => %s', old_folder, new_folder)
1260 os.rename(old_folder, new_folder)
1261 else:
1262 # Both folders exist -> move files and delete old folder
1263 logger.info('Moving files from %s to %s', old_folder,
1264 new_folder)
1265 for file in glob.glob(os.path.join(old_folder, '*')):
1266 shutil.move(file, new_folder)
1267 logger.info('Removing %s', old_folder)
1268 shutil.rmtree(old_folder, ignore_errors=True)
1269 self.download_folder = new_folder_name
1271 self.title = new_title
1272 self.save()
1274 def get_downloaded_episodes(self):
1275 return filter(lambda e: e.was_downloaded(), self.get_all_episodes())
1277 def _determine_common_prefix(self):
1278 # We need at least 2 episodes for the prefix to be "common" ;)
1279 if len(self.children) < 2:
1280 self._common_prefix = ''
1281 return
1283 prefix = os.path.commonprefix([x.title for x in self.children])
1284 # The common prefix must end with a space - otherwise it's not
1285 # on a word boundary, and we might end up chopping off too much
1286 if prefix and prefix[-1] != ' ':
1287 prefix = prefix[:prefix.rfind(' ')+1]
1289 self._common_prefix = prefix
1291 def get_all_episodes(self):
1292 if self.children is None:
1293 self.children = self.db.load_episodes(self, self.episode_factory)
1294 self._determine_common_prefix()
1296 return self.children
1298 def find_unique_folder_name(self, download_folder):
1299 # Remove trailing dots to avoid errors on Windows (bug 600)
1300 # Also remove leading dots to avoid hidden folders on Linux
1301 download_folder = download_folder.strip('.' + string.whitespace)
1303 for folder_name in util.generate_names(download_folder):
1304 if (not self.db.podcast_download_folder_exists(folder_name) or
1305 self.download_folder == folder_name):
1306 return folder_name
1308 def get_save_dir(self, force_new=False):
1309 if self.download_folder is None or force_new:
1310 # we must change the folder name, because it has not been set manually
1311 fn_template = util.sanitize_filename(self.title, self.MAX_FOLDERNAME_LENGTH)
1313 if not fn_template:
1314 fn_template = util.sanitize_filename(self.url, self.MAX_FOLDERNAME_LENGTH)
1316 # Find a unique folder name for this podcast
1317 download_folder = self.find_unique_folder_name(fn_template)
1319 # Try removing the download folder if it has been created previously
1320 if self.download_folder is not None:
1321 folder = os.path.join(gpodder.downloads, self.download_folder)
1322 try:
1323 os.rmdir(folder)
1324 except OSError:
1325 logger.info('Old download folder is kept for %s', self.url)
1327 logger.info('Updating download_folder of %s to %s', self.url,
1328 download_folder)
1329 self.download_folder = download_folder
1330 self.save()
1332 save_dir = os.path.join(gpodder.downloads, self.download_folder)
1334 # Avoid encoding errors for OS-specific functions (bug 1570)
1335 save_dir = util.sanitize_encoding(save_dir)
1337 # Create save_dir if it does not yet exist
1338 if not util.make_directory(save_dir):
1339 logger.error('Could not create save_dir: %s', save_dir)
1341 return save_dir
1343 save_dir = property(fget=get_save_dir)
1345 def remove_downloaded(self):
1346 # Remove the download directory
1347 for episode in self.get_downloaded_episodes():
1348 filename = episode.local_filename(create=False, check_only=True)
1349 if filename is not None:
1350 gpodder.user_extensions.on_episode_delete(episode, filename)
1352 shutil.rmtree(self.save_dir, True)
1354 @property
1355 def cover_file(self):
1356 return os.path.join(self.save_dir, 'folder')
1359 class Model(object):
1360 PodcastClass = PodcastChannel
1362 def __init__(self, db):
1363 self.db = db
1364 self.children = None
1366 def _append_podcast(self, podcast):
1367 if podcast not in self.children:
1368 self.children.append(podcast)
1370 def _remove_podcast(self, podcast):
1371 self.children.remove(podcast)
1372 gpodder.user_extensions.on_podcast_delete(self)
1374 def get_podcasts(self):
1375 def podcast_factory(dct, db):
1376 return self.PodcastClass.create_from_dict(dct, self)
1378 if self.children is None:
1379 self.children = self.db.load_podcasts(podcast_factory)
1381 # Check download folders for changes (bug 902)
1382 for podcast in self.children:
1383 podcast.check_download_folder()
1385 return self.children
1387 def load_podcast(self, url, create=True, authentication_tokens=None,
1388 max_episodes=0):
1389 return self.PodcastClass.load(self, url, create,
1390 authentication_tokens,
1391 max_episodes)
1393 @classmethod
1394 def podcast_sort_key(cls, podcast):
1395 return cls.PodcastClass.sort_key(podcast)
1397 @classmethod
1398 def episode_sort_key(cls, episode):
1399 return episode.published
1401 @classmethod
1402 def sort_episodes_by_pubdate(cls, episodes, reverse=False):
1403 """Sort a list of PodcastEpisode objects chronologically
1405 Returns a iterable, sorted sequence of the episodes
1407 return sorted(episodes, key=cls.episode_sort_key, reverse=reverse)