1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2012 Thomas Perl and the gPodder Team
5 # Copyright (c) 2011 Neal H. Walfield
7 # gPodder is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # gPodder is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 # gpodder.model - Core model classes for gPodder (2009-08-13)
24 # Based on libpodcasts.py (thp, 2005-10-29)
28 from gpodder
import util
29 from gpodder
import feedcore
30 from gpodder
import youtube
31 from gpodder
import vimeo
32 from gpodder
import schema
33 from gpodder
import coverart
36 logger
= logging
.getLogger(__name__
)
53 def get_payment_priority(url
):
55 at the moment we only support flattr.com as an payment provider, so we
56 sort the payment providers and prefer flattr.com ("1" is higher priority than "2")
58 if 'flattr.com' in url
:
62 class CustomFeed(feedcore
.ExceptionWithData
): pass
64 class gPodderFetcher(feedcore
.Fetcher
):
66 This class extends the feedcore Fetcher with the gPodder User-Agent and the
67 Proxy handler based on the current settings in gPodder.
72 feedcore
.Fetcher
.__init
__(self
, gpodder
.user_agent
)
74 def fetch_channel(self
, channel
):
75 etag
= channel
.http_etag
76 modified
= feedparser
._parse
_date
(channel
.http_last_modified
)
77 # If we have a username or password, rebuild the url with them included
78 # Note: using a HTTPBasicAuthHandler would be pain because we need to
79 # know the realm. It can be done, but I think this method works, too
80 url
= channel
.authenticate_url(channel
.url
)
81 for handler
in self
.custom_handlers
:
82 custom_feed
= handler
.handle_url(url
)
83 if custom_feed
is not None:
84 return feedcore
.Result(feedcore
.CUSTOM_FEED
, custom_feed
)
85 return self
.fetch(url
, etag
, modified
)
87 def _resolve_url(self
, url
):
88 url
= youtube
.get_real_channel_url(url
)
89 url
= vimeo
.get_real_channel_url(url
)
93 def register(cls
, handler
):
94 cls
.custom_handlers
.append(handler
)
96 # The "register" method is exposed here for external usage
97 register_custom_handler
= gPodderFetcher
.register
101 # database -> podcast -> episode -> download/playback
102 # podcast.parent == db
103 # podcast.children == [episode, ...]
104 # episode.parent == podcast
106 # - normally: episode.children = (None, None)
107 # - downloading: episode.children = (DownloadTask(), None)
108 # - playback: episode.children = (None, PlaybackTask())
111 class PodcastModelObject(object):
113 A generic base class for our podcast model providing common helper
114 and utility functions.
116 __slots__
= ('id', 'parent', 'children', 'changed')
119 def create_from_dict(cls
, d
, *args
):
121 Create a new object, passing "args" to the constructor
122 and then updating the object with the values from "d".
128 # XXX: all(map(lambda k: hasattr(o, k), d))?
129 for k
, v
in d
.iteritems():
136 def __setattr__(self
, name
, value
):
137 """Track changes once "self.changed" is a dictionary
139 The changed values will be stored in self.changed until
140 _clear_changes is called.
142 if getattr(self
, 'changed', None) is not None and self
.id is not None:
143 old_value
= getattr(self
, name
, None)
145 if old_value
is not None and value
!= old_value
:
146 # Value changed (and it is not an initialization)
147 if name
not in self
.changed
:
148 self
.changed
[name
] = old_value
149 # logger.debug("%s: %s.%s changed: %s -> %s"
150 # % (self.__class__.__name__, self.id, name,
153 super(PodcastModelObject
, self
).__setattr
__(name
, value
)
155 def _clear_changes(self
):
156 # logger.debug("Changes: %s: %s"
157 # % ([getattr (self, a) for a in self.__slots__],
158 # str(self.changed),))
161 class PodcastEpisode(PodcastModelObject
):
162 """holds data for one object in a channel"""
163 MAX_FILENAME_LENGTH
= 200
165 __slots__
= schema
.EpisodeColumns
167 def _deprecated(self
):
168 raise Exception('Property is deprecated!')
170 is_played
= property(fget
=_deprecated
, fset
=_deprecated
)
171 is_locked
= property(fget
=_deprecated
, fset
=_deprecated
)
173 def has_website_link(self
):
174 return bool(self
.link
) and (self
.link
!= self
.url
or \
175 youtube
.is_video_link(self
.link
))
178 def from_feedparser_entry(cls
, entry
, channel
):
179 episode
= cls(channel
)
180 episode
.guid
= entry
.get('id', '')
182 # Replace multi-space and newlines with single space (Maemo bug 11173)
183 episode
.title
= re
.sub('\s+', ' ', entry
.get('title', ''))
184 episode
.link
= entry
.get('link', '')
185 if 'content' in entry
and len(entry
['content']) and \
186 entry
['content'][0].get('type', '') == 'text/html':
187 episode
.description
= entry
['content'][0].value
189 episode
.description
= entry
.get('summary', '')
191 # Fallback to subtitle if summary is not available
192 if not episode
.description
:
193 episode
.description
= entry
.get('subtitle', '')
198 # Parse iTunes-specific podcast duration metadata
199 itunes_duration
= entry
.get('itunes_duration', '')
201 total_time
= util
.parse_time(itunes_duration
)
203 # Parse time from YouTube descriptions if it's a YouTube feed
204 if youtube
.is_youtube_guid(episode
.guid
):
205 result
= re
.search(r
'Time:<[^>]*>\n<[^>]*>([:0-9]*)<',
208 youtube_duration
= result
.group(1)
209 total_time
= util
.parse_time(youtube_duration
)
211 episode
.total_time
= total_time
215 episode
.published
= feedcore
.get_pubdate(entry
)
217 enclosures
= entry
.get('enclosures', [])
218 media_rss_content
= entry
.get('media_content', [])
219 audio_available
= any(e
.get('type', '').startswith('audio/') \
220 for e
in enclosures
+ media_rss_content
)
221 video_available
= any(e
.get('type', '').startswith('video/') \
222 for e
in enclosures
+ media_rss_content
)
224 # XXX: Make it possible for hooks/extensions to override this by
225 # giving them a list of enclosures and the "self" object (podcast)
226 # and letting them sort and/or filter the list of enclosures to
227 # get the desired enclosure picked by the algorithm below.
228 filter_and_sort_enclosures
= lambda x
: x
230 # read the flattr auto-url, if exists
231 payment_info
= [link
['href'] for link
in entry
.get('links', [])
232 if link
['rel'] == 'payment']
234 episode
.payment_url
= sorted(payment_info
, key
=get_payment_priority
)[0]
237 for e
in filter_and_sort_enclosures(enclosures
):
238 episode
.mime_type
= e
.get('type', 'application/octet-stream')
239 if episode
.mime_type
== '':
240 # See Maemo bug 10036
241 logger
.warn('Fixing empty mimetype in ugly feed')
242 episode
.mime_type
= 'application/octet-stream'
244 if '/' not in episode
.mime_type
:
247 # Skip images in feeds if audio or video is available (bug 979)
248 # This must (and does) also look in Media RSS enclosures (bug 1430)
249 if episode
.mime_type
.startswith('image/') and \
250 (audio_available
or video_available
):
253 # If we have audio or video available later on, skip
254 # 'application/octet-stream' data types (fixes Linux Outlaws)
255 if episode
.mime_type
== 'application/octet-stream' and \
256 (audio_available
or video_available
):
259 episode
.url
= util
.normalize_feed_url(e
.get('href', ''))
264 episode
.file_size
= int(e
.length
) or -1
266 episode
.file_size
= -1
271 for m
in filter_and_sort_enclosures(media_rss_content
):
272 episode
.mime_type
= m
.get('type', 'application/octet-stream')
273 if '/' not in episode
.mime_type
:
276 # Skip images in Media RSS if we have audio/video (bug 1444)
277 if episode
.mime_type
.startswith('image/') and \
278 (audio_available
or video_available
):
281 episode
.url
= util
.normalize_feed_url(m
.get('url', ''))
286 episode
.file_size
= int(m
.get('filesize', 0)) or -1
288 episode
.file_size
= -1
291 episode
.total_time
= int(m
.get('duration', 0)) or 0
293 episode
.total_time
= 0
297 # Brute-force detection of any links
298 for l
in entry
.get('links', ()):
299 episode
.url
= util
.normalize_feed_url(l
.get('href', ''))
303 if (youtube
.is_video_link(episode
.url
) or \
304 vimeo
.is_video_link(episode
.url
)):
307 # Check if we can resolve this link to a audio/video file
308 filename
, extension
= util
.filename_from_url(episode
.url
)
309 file_type
= util
.file_type_by_extension(extension
)
310 if file_type
is None and hasattr(l
, 'type'):
311 extension
= util
.extension_from_mimetype(l
.type)
312 file_type
= util
.file_type_by_extension(extension
)
314 # The link points to a audio or video file - use it!
315 if file_type
is not None:
320 def __init__(self
, channel
):
321 self
.parent
= channel
322 self
.podcast_id
= self
.parent
.id
323 self
.children
= (None, None)
329 self
.mime_type
= 'application/octet-stream'
331 self
.description
= ''
334 self
.download_filename
= None
335 self
.payment_url
= None
337 self
.state
= gpodder
.STATE_NORMAL
339 self
.archive
= channel
.auto_archive_episodes
343 self
.current_position
= 0
344 self
.current_position_updated
= 0
346 # Timestamp of last playback time
347 self
.last_playback
= 0
355 return self
.parent
.parent
.db
358 def trimmed_title(self
):
359 """Return the title with the common prefix trimmed"""
360 # Minimum amount of leftover characters after trimming. This
361 # avoids things like "Common prefix 123" to become just "123".
362 # If there are LEFTOVER_MIN or less characters after trimming,
363 # the original title will be returned without trimming.
366 # "Podcast Name - Title" and "Podcast Name: Title" -> "Title"
367 for postfix
in (' - ', ': '):
368 prefix
= self
.parent
.title
+ postfix
369 if (self
.title
.startswith(prefix
) and
370 len(self
.title
)-len(prefix
) > LEFTOVER_MIN
):
371 return self
.title
[len(prefix
):]
374 # "Podcast Name <number>: ..." -> "<number>: ..."
375 r
'^%s (\d+: .*)' % re
.escape(self
.parent
.title
),
377 # "Episode <number>: ..." -> "<number>: ..."
381 for pattern
in regex_patterns
:
382 if re
.match(pattern
, self
.title
):
383 title
= re
.sub(pattern
, r
'\1', self
.title
)
384 if len(title
) > LEFTOVER_MIN
:
387 # "#001: Title" -> "001: Title"
388 if (not self
.parent
._common
_prefix
and re
.match('^#\d+: ',
389 self
.title
) and len(self
.title
)-1 > LEFTOVER_MIN
):
390 return self
.title
[1:]
392 if (self
.parent
._common
_prefix
is not None and
393 self
.title
.startswith(self
.parent
._common
_prefix
) and
394 len(self
.title
)-len(self
.parent
._common
_prefix
) > LEFTOVER_MIN
):
395 return self
.title
[len(self
.parent
._common
_prefix
):]
399 def _set_download_task(self
, download_task
):
400 self
.children
= (download_task
, self
.children
[1])
402 def _get_download_task(self
):
403 return self
.children
[0]
405 download_task
= property(_get_download_task
, _set_download_task
)
408 def downloading(self
):
409 task
= self
.download_task
413 return task
.status
in (task
.DOWNLOADING
, task
.QUEUED
, task
.PAUSED
)
415 def check_is_new(self
):
416 return (self
.state
== gpodder
.STATE_NORMAL
and self
.is_new
and
417 not self
.downloading
)
420 gpodder
.user_extensions
.on_episode_save(self
)
422 self
._clear
_changes
()
424 self
.db
.save_episode(self
)
426 def on_downloaded(self
, filename
):
427 self
.state
= gpodder
.STATE_DOWNLOADED
429 self
.file_size
= os
.path
.getsize(filename
)
432 def set_state(self
, state
):
436 def playback_mark(self
):
438 self
.last_playback
= int(time
.time())
441 def mark(self
, state
=None, is_played
=None, is_locked
=None):
442 if state
is not None:
444 if is_played
is not None:
445 self
.is_new
= not is_played
447 # "Mark as new" must "undelete" the episode
448 if self
.is_new
and self
.state
== gpodder
.STATE_DELETED
:
449 self
.state
= gpodder
.STATE_NORMAL
450 if is_locked
is not None:
451 self
.archive
= is_locked
454 def age_in_days(self
):
455 return util
.file_age_in_days(self
.local_filename(create
=False, \
458 age_int_prop
= property(fget
=age_in_days
)
460 def get_age_string(self
):
461 return util
.file_age_to_string(self
.age_in_days())
463 age_prop
= property(fget
=get_age_string
)
466 def description_html(self
):
467 # XXX: That's not a very well-informed heuristic to check
468 # if the description already contains HTML. Better ideas?
469 if '<' in self
.description
:
470 return self
.description
472 return self
.description
.replace('\n', '<br>')
474 def one_line_description(self
):
475 MAX_LINE_LENGTH
= 120
476 desc
= util
.remove_html_tags(self
.description
or '')
477 desc
= re
.sub('\s+', ' ', desc
).strip()
479 return _('No description available')
481 # Decode the description to avoid gPodder bug 1277
482 desc
= util
.convert_bytes(desc
).strip()
484 if len(desc
) > MAX_LINE_LENGTH
:
485 return desc
[:MAX_LINE_LENGTH
] + '...'
489 def delete_from_disk(self
):
490 filename
= self
.local_filename(create
=False, check_only
=True)
491 if filename
is not None:
492 gpodder
.user_extensions
.on_episode_delete(self
, filename
)
493 util
.delete_file(filename
)
495 self
.set_state(gpodder
.STATE_DELETED
)
497 def get_playback_url(self
, fmt_ids
=None, allow_partial
=False):
498 """Local (or remote) playback/streaming filename/URL
500 Returns either the local filename or a streaming URL that
501 can be used to playback this episode.
503 Also returns the filename of a partially downloaded file
504 in case partial (preview) playback is desired.
506 url
= self
.local_filename(create
=False)
508 if (allow_partial
and url
is not None and
509 os
.path
.exists(url
+ '.partial')):
510 return url
+ '.partial'
512 if url
is None or not os
.path
.exists(url
):
514 url
= youtube
.get_real_download_url(url
, fmt_ids
)
515 url
= vimeo
.get_real_download_url(url
)
519 def find_unique_file_name(self
, filename
, extension
):
520 # Remove leading and trailing whitespace + dots (to avoid hidden files)
521 filename
= filename
.strip('.' + string
.whitespace
) + extension
523 for name
in util
.generate_names(filename
):
524 if (not self
.db
.episode_filename_exists(self
.podcast_id
, name
) or
525 self
.download_filename
== name
):
528 def local_filename(self
, create
, force_update
=False, check_only
=False,
529 template
=None, return_wanted_filename
=False):
530 """Get (and possibly generate) the local saving filename
532 Pass create=True if you want this function to generate a
533 new filename if none exists. You only want to do this when
534 planning to create/download the file after calling this function.
536 Normally, you should pass create=False. This will only
537 create a filename when the file already exists from a previous
538 version of gPodder (where we used md5 filenames). If the file
539 does not exist (and the filename also does not exist), this
540 function will return None.
542 If you pass force_update=True to this function, it will try to
543 find a new (better) filename and move the current file if this
544 is the case. This is useful if (during the download) you get
545 more information about the file, e.g. the mimetype and you want
546 to include this information in the file name generation process.
548 If check_only=True is passed to this function, it will never try
549 to rename the file, even if would be a good idea. Use this if you
550 only want to check if a file exists.
552 If "template" is specified, it should be a filename that is to
553 be used as a template for generating the "real" filename.
555 The generated filename is stored in the database for future access.
557 If return_wanted_filename is True, the filename will not be written to
558 the database, but simply returned by this function (for use by the
559 "import external downloads" feature).
561 if self
.download_filename
is None and (check_only
or not create
):
564 ext
= self
.extension(may_call_local_filename
=False).encode('utf-8', 'ignore')
566 if not check_only
and (force_update
or not self
.download_filename
):
567 # Avoid and catch gPodder bug 1440 and similar situations
569 logger
.warn('Empty template. Report this podcast URL %s',
573 # Try to find a new filename for the current file
574 if template
is not None:
575 # If template is specified, trust the template's extension
576 episode_filename
, ext
= os
.path
.splitext(template
)
578 episode_filename
, _
= util
.filename_from_url(self
.url
)
579 fn_template
= util
.sanitize_filename(episode_filename
, self
.MAX_FILENAME_LENGTH
)
581 if 'redirect' in fn_template
and template
is None:
582 # This looks like a redirection URL - force URL resolving!
583 logger
.warn('Looks like a redirection to me: %s', self
.url
)
584 url
= util
.get_real_url(self
.channel
.authenticate_url(self
.url
))
585 logger
.info('Redirection resolved to: %s', url
)
586 episode_filename
, _
= util
.filename_from_url(url
)
587 fn_template
= util
.sanitize_filename(episode_filename
, self
.MAX_FILENAME_LENGTH
)
589 # Use title for YouTube downloads and Soundcloud streams
590 if youtube
.is_video_link(self
.url
) or fn_template
== 'stream':
591 sanitized
= util
.sanitize_filename(self
.title
, self
.MAX_FILENAME_LENGTH
)
593 fn_template
= sanitized
595 # If the basename is empty, use the md5 hexdigest of the URL
596 if not fn_template
or fn_template
.startswith('redirect.'):
597 logger
.error('Report this feed: Podcast %s, episode %s',
598 self
.channel
.url
, self
.url
)
599 fn_template
= hashlib
.md5(self
.url
).hexdigest()
601 # Find a unique filename for this episode
602 wanted_filename
= self
.find_unique_file_name(fn_template
, ext
)
604 if return_wanted_filename
:
605 # return the calculated filename without updating the database
606 return wanted_filename
608 # The old file exists, but we have decided to want a different filename
609 if self
.download_filename
and wanted_filename
!= self
.download_filename
:
610 # there might be an old download folder crawling around - move it!
611 new_file_name
= os
.path
.join(self
.channel
.save_dir
, wanted_filename
)
612 old_file_name
= os
.path
.join(self
.channel
.save_dir
, self
.download_filename
)
613 if os
.path
.exists(old_file_name
) and not os
.path
.exists(new_file_name
):
614 logger
.info('Renaming %s => %s', old_file_name
, new_file_name
)
615 os
.rename(old_file_name
, new_file_name
)
616 elif force_update
and not os
.path
.exists(old_file_name
):
617 # When we call force_update, the file might not yet exist when we
618 # call it from the downloading code before saving the file
619 logger
.info('Choosing new filename: %s', new_file_name
)
621 logger
.warn('%s exists or %s does not', new_file_name
, old_file_name
)
622 logger
.info('Updating filename of %s to "%s".', self
.url
, wanted_filename
)
623 elif self
.download_filename
is None:
624 logger
.info('Setting download filename: %s', wanted_filename
)
625 self
.download_filename
= wanted_filename
628 return os
.path
.join(util
.sanitize_encoding(self
.channel
.save_dir
),
629 util
.sanitize_encoding(self
.download_filename
))
631 def set_mimetype(self
, mimetype
, commit
=False):
632 """Sets the mimetype for this episode"""
633 self
.mime_type
= mimetype
637 def extension(self
, may_call_local_filename
=True):
638 filename
, ext
= util
.filename_from_url(self
.url
)
639 if may_call_local_filename
:
640 filename
= self
.local_filename(create
=False)
641 if filename
is not None:
642 filename
, ext
= os
.path
.splitext(filename
)
643 # if we can't detect the extension from the url fallback on the mimetype
644 if ext
== '' or util
.file_type_by_extension(ext
) is None:
645 ext
= util
.extension_from_mimetype(self
.mime_type
)
656 def file_exists(self
):
657 filename
= self
.local_filename(create
=False, check_only
=True)
661 return os
.path
.exists(filename
)
663 def was_downloaded(self
, and_exists
=False):
664 if self
.state
!= gpodder
.STATE_DOWNLOADED
:
666 if and_exists
and not self
.file_exists():
670 def sync_filename(self
, use_custom
=False, custom_format
=None):
672 return util
.object_string_formatter(custom_format
,
673 episode
=self
, podcast
=self
.channel
)
678 # Assume all YouTube/Vimeo links are video files
679 if youtube
.is_video_link(self
.url
) or vimeo
.is_video_link(self
.url
):
682 return util
.file_type_by_extension(self
.extension())
686 return os
.path
.splitext( os
.path
.basename( self
.url
))[0]
691 Returns published time as HHMM (or 0000 if not available)
694 return datetime
.datetime
.fromtimestamp(self
.published
).strftime('%H%M')
696 logger
.warn('Cannot format pubtime: %s', self
.title
, exc_info
=True)
699 def playlist_title(self
):
700 """Return a title for this episode in a playlist
702 The title will be composed of the podcast name, the
703 episode name and the publication date. The return
704 value is the canonical representation of this episode
705 in playlists (for example, M3U playlists).
707 return '%s - %s (%s)' % (self
.channel
.title
, \
711 def cute_pubdate(self
):
712 result
= util
.format_date(self
.published
)
714 return '(%s)' % _('unknown')
718 pubdate_prop
= property(fget
=cute_pubdate
)
722 return str(datetime
.datetime
.fromtimestamp(self
.published
).strftime('%F'))
724 def calculate_filesize(self
):
725 filename
= self
.local_filename(create
=False)
730 self
.file_size
= os
.path
.getsize(filename
)
732 logger
.error('Could not get file size: %s', filename
, exc_info
=True)
734 def is_finished(self
):
735 """Return True if this episode is considered "finished playing"
737 An episode is considered "finished" when there is a
738 current position mark on the track, and when the
739 current position is greater than 99 percent of the
740 total time or inside the last 10 seconds of a track.
742 return self
.current_position
> 0 and self
.total_time
> 0 and \
743 (self
.current_position
+ 10 >= self
.total_time
or \
744 self
.current_position
>= self
.total_time
*.99)
746 def get_play_info_string(self
, duration_only
=False):
747 duration
= util
.format_time(self
.total_time
)
748 if duration_only
and self
.total_time
> 0:
750 elif self
.current_position
> 0 and \
751 self
.current_position
!= self
.total_time
:
752 position
= util
.format_time(self
.current_position
)
753 return '%s / %s' % (position
, duration
)
754 elif self
.total_time
> 0:
759 def update_from(self
, episode
):
760 for k
in ('title', 'url', 'description', 'link', 'published', 'guid', 'file_size', 'payment_url'):
761 setattr(self
, k
, getattr(episode
, k
))
764 class PodcastChannel(PodcastModelObject
):
765 __slots__
= schema
.PodcastColumns
+ ('_common_prefix',)
767 UNICODE_TRANSLATE
= {ord(u
'ö'): u
'o', ord(u
'ä'): u
'a', ord(u
'ü'): u
'u'}
769 # Enumerations for download strategy
770 STRATEGY_DEFAULT
, STRATEGY_LATEST
= range(2)
772 # Description and ordering of strategies
774 (STRATEGY_DEFAULT
, _('Default')),
775 (STRATEGY_LATEST
, _('Only keep latest')),
778 MAX_FOLDERNAME_LENGTH
= 60
779 SECONDS_PER_WEEK
= 7*24*60*60
780 EpisodeClass
= PodcastEpisode
782 feed_fetcher
= gPodderFetcher()
784 def __init__(self
, model
):
792 self
.description
= ''
793 self
.cover_url
= None
794 self
.payment_url
= None
796 self
.auth_username
= ''
797 self
.auth_password
= ''
799 self
.http_last_modified
= None
800 self
.http_etag
= None
802 self
.auto_archive_episodes
= False
803 self
.download_folder
= None
804 self
.pause_subscription
= False
805 self
.sync_to_mp3_player
= True
807 self
.section
= _('Other')
808 self
._common
_prefix
= None
809 self
.download_strategy
= PodcastChannel
.STRATEGY_DEFAULT
817 return self
.parent
.db
819 def get_download_strategies(self
):
820 for value
, caption
in PodcastChannel
.STRATEGIES
:
821 yield self
.download_strategy
== value
, value
, caption
823 def set_download_strategy(self
, download_strategy
):
824 if download_strategy
== self
.download_strategy
:
827 caption
= dict(self
.STRATEGIES
).get(download_strategy
)
828 if caption
is not None:
829 logger
.debug('Strategy for %s changed to %s', self
.title
, caption
)
830 self
.download_strategy
= download_strategy
832 logger
.warn('Cannot set strategy to %d', download_strategy
)
834 def check_download_folder(self
):
835 """Check the download folder for externally-downloaded files
837 This will try to assign downloaded files with episodes in the
840 This will also cause missing files to be marked as deleted.
844 for episode
in self
.get_downloaded_episodes():
845 if episode
.was_downloaded():
846 filename
= episode
.local_filename(create
=False)
847 if not os
.path
.exists(filename
):
848 # File has been deleted by the user - simulate a
849 # delete event (also marks the episode as deleted)
850 logger
.debug('Episode deleted: %s', filename
)
851 episode
.delete_from_disk()
854 known_files
.add(filename
)
856 existing_files
= set(filename
for filename
in \
857 glob
.glob(os
.path
.join(self
.save_dir
, '*')) \
858 if not filename
.endswith('.partial'))
860 ignore_files
= ['folder'+ext
for ext
in
861 coverart
.CoverDownloader
.EXTENSIONS
]
863 external_files
= existing_files
.difference(list(known_files
) +
864 [os
.path
.join(self
.save_dir
, ignore_file
)
865 for ignore_file
in ignore_files
])
866 if not external_files
:
869 all_episodes
= self
.get_all_episodes()
871 for filename
in external_files
:
874 basename
= os
.path
.basename(filename
)
875 existing
= [e
for e
in all_episodes
if e
.download_filename
== basename
]
877 existing
= existing
[0]
878 logger
.info('Importing external download: %s', filename
)
879 existing
.on_downloaded(filename
)
882 for episode
in all_episodes
:
883 wanted_filename
= episode
.local_filename(create
=True, \
884 return_wanted_filename
=True)
885 if basename
== wanted_filename
:
886 logger
.info('Importing external download: %s', filename
)
887 episode
.download_filename
= basename
888 episode
.on_downloaded(filename
)
892 wanted_base
, wanted_ext
= os
.path
.splitext(wanted_filename
)
893 target_base
, target_ext
= os
.path
.splitext(basename
)
894 if wanted_base
== target_base
:
895 # Filenames only differ by the extension
896 wanted_type
= util
.file_type_by_extension(wanted_ext
)
897 target_type
= util
.file_type_by_extension(target_ext
)
899 # If wanted type is None, assume that we don't know
900 # the right extension before the download (e.g. YouTube)
901 # if the wanted type is the same as the target type,
902 # assume that it's the correct file
903 if wanted_type
is None or wanted_type
== target_type
:
904 logger
.info('Importing external download: %s', filename
)
905 episode
.download_filename
= basename
906 episode
.on_downloaded(filename
)
910 if not found
and not util
.is_system_file(filename
):
911 logger
.warn('Unknown external file: %s', filename
)
914 def sort_key(cls
, podcast
):
915 key
= util
.convert_bytes(podcast
.title
.lower())
916 return re
.sub('^the ', '', key
).translate(cls
.UNICODE_TRANSLATE
)
919 def load(cls
, model
, url
, create
=True, authentication_tokens
=None,\
921 if isinstance(url
, unicode):
922 url
= url
.encode('utf-8')
924 existing
= filter(lambda p
: p
.url
== url
, model
.get_podcasts())
932 if authentication_tokens
is not None:
933 tmp
.auth_username
= authentication_tokens
[0]
934 tmp
.auth_password
= authentication_tokens
[1]
936 # Save podcast, so it gets an ID assigned before
937 # updating the feed and adding saving episodes
941 tmp
.update(max_episodes
)
943 logger
.debug('Fetch failed. Removing buggy feed.')
944 tmp
.remove_downloaded()
948 # Determine the section in which this podcast should appear
949 tmp
.section
= tmp
._get
_content
_type
()
951 # Determine a new download folder now that we have the title
952 tmp
.get_save_dir(force_new
=True)
954 # Mark episodes as downloaded if files already exist (bug 902)
955 tmp
.check_download_folder()
957 # Determine common prefix of episode titles
958 tmp
._determine
_common
_prefix
()
962 gpodder
.user_extensions
.on_podcast_subscribe(tmp
)
966 def episode_factory(self
, d
):
968 This function takes a dictionary containing key-value pairs for
969 episodes and returns a new PodcastEpisode object that is connected
972 Returns: A new PodcastEpisode object
974 return self
.EpisodeClass
.create_from_dict(d
, self
)
976 def _consume_updated_title(self
, new_title
):
977 # Replace multi-space and newlines with single space (Maemo bug 11173)
978 new_title
= re
.sub('\s+', ' ', new_title
).strip()
980 # Only update the podcast-supplied title when we
981 # don't yet have a title, or if the title is the
982 # feed URL (e.g. we didn't find a title before).
983 if not self
.title
or self
.title
== self
.url
:
984 self
.title
= new_title
986 # Start YouTube- and Vimeo-specific title FIX
987 YOUTUBE_PREFIX
= 'Uploads by '
988 VIMEO_PREFIX
= 'Vimeo / '
989 if self
.title
.startswith(YOUTUBE_PREFIX
):
990 self
.title
= self
.title
[len(YOUTUBE_PREFIX
):] + ' on YouTube'
991 elif self
.title
.startswith(VIMEO_PREFIX
):
992 self
.title
= self
.title
[len(VIMEO_PREFIX
):] + ' on Vimeo'
993 # End YouTube- and Vimeo-specific title FIX
995 def _consume_metadata(self
, title
, link
, description
, cover_url
,
997 self
._consume
_updated
_title
(title
)
999 self
.description
= description
1000 self
.cover_url
= cover_url
1001 self
.payment_url
= payment_url
1004 def _consume_custom_feed(self
, custom_feed
, max_episodes
=0):
1005 self
._consume
_metadata
(custom_feed
.get_title(),
1006 custom_feed
.get_link(),
1007 custom_feed
.get_description(),
1008 custom_feed
.get_image(),
1011 existing
= self
.get_all_episodes()
1012 existing_guids
= [episode
.guid
for episode
in existing
]
1014 # Insert newly-found episodes into the database + local cache
1015 new_episodes
, seen_guids
= custom_feed
.get_new_episodes(self
, existing_guids
)
1016 self
.children
.extend(new_episodes
)
1018 self
.remove_unreachable_episodes(existing
, seen_guids
, max_episodes
)
1020 def _consume_updated_feed(self
, feed
, max_episodes
=0):
1022 if hasattr(feed
.feed
, 'image'):
1023 for attribute
in ('href', 'url'):
1024 new_value
= getattr(feed
.feed
.image
, attribute
, None)
1025 if new_value
is not None:
1026 cover_url
= new_value
1027 elif hasattr(feed
.feed
, 'icon'):
1028 cover_url
= feed
.feed
.icon
1032 # Payment URL (Flattr auto-payment) information
1033 payment_info
= [link
['href'] for link
in feed
.feed
.get('links', [])
1034 if link
['rel'] == 'payment']
1036 payment_url
= sorted(payment_info
, key
=get_payment_priority
)[0]
1040 self
._consume
_metadata
(feed
.feed
.get('title', self
.url
),
1041 feed
.feed
.get('link', self
.link
),
1042 feed
.feed
.get('subtitle', self
.description
),
1046 # Load all episodes to update them properly.
1047 existing
= self
.get_all_episodes()
1049 # We have to sort the entries in descending chronological order,
1050 # because if the feed lists items in ascending order and has >
1051 # max_episodes old episodes, new episodes will not be shown.
1052 # See also: gPodder Bug 1186
1053 entries
= sorted(feed
.entries
, key
=feedcore
.get_pubdate
, reverse
=True)
1055 # We can limit the maximum number of entries that gPodder will parse
1056 if max_episodes
> 0 and len(entries
) > max_episodes
:
1057 entries
= entries
[:max_episodes
]
1059 # GUID-based existing episode list
1060 existing_guids
= dict((e
.guid
, e
) for e
in existing
)
1062 # Get most recent published of all episodes
1063 last_published
= self
.db
.get_last_published(self
) or 0
1065 # Keep track of episode GUIDs currently seen in the feed
1068 # Number of new episodes found
1071 # Search all entries for new episodes
1072 for entry
in entries
:
1073 episode
= self
.EpisodeClass
.from_feedparser_entry(entry
, self
)
1074 if episode
is not None:
1075 if not episode
.title
:
1076 logger
.warn('Using filename as title for %s', episode
.url
)
1077 basename
= os
.path
.basename(episode
.url
)
1078 episode
.title
, ext
= os
.path
.splitext(basename
)
1081 if not episode
.guid
:
1082 logger
.warn('Using download URL as GUID for %s', episode
.title
)
1083 episode
.guid
= episode
.url
1085 seen_guids
.add(episode
.guid
)
1089 # Detect (and update) existing episode based on GUIDs
1090 existing_episode
= existing_guids
.get(episode
.guid
, None)
1091 if existing_episode
:
1092 existing_episode
.update_from(episode
)
1093 existing_episode
.save()
1096 # Workaround for bug 340: If the episode has been
1097 # published earlier than one week before the most
1098 # recent existing episode, do not mark it as new.
1099 if episode
.published
< last_published
- self
.SECONDS_PER_WEEK
:
1100 logger
.debug('Episode with old date: %s', episode
.title
)
1101 episode
.is_new
= False
1106 # Only allow a certain number of new episodes per update
1107 if (self
.download_strategy
== PodcastChannel
.STRATEGY_LATEST
and
1109 episode
.is_new
= False
1112 self
.children
.append(episode
)
1114 self
.remove_unreachable_episodes(existing
, seen_guids
, max_episodes
)
1116 def remove_unreachable_episodes(self
, existing
, seen_guids
, max_episodes
):
1117 # Remove "unreachable" episodes - episodes that have not been
1118 # downloaded and that the feed does not list as downloadable anymore
1119 # Keep episodes that are currently being downloaded, though (bug 1534)
1120 if self
.id is not None:
1121 episodes_to_purge
= (e
for e
in existing
if
1122 e
.state
!= gpodder
.STATE_DOWNLOADED
and
1123 e
.guid
not in seen_guids
and not e
.downloading
)
1125 for episode
in episodes_to_purge
:
1126 logger
.debug('Episode removed from feed: %s (%s)',
1127 episode
.title
, episode
.guid
)
1128 gpodder
.user_extensions
.on_episode_removed_from_podcast(episode
)
1129 self
.db
.delete_episode_by_guid(episode
.guid
, self
.id)
1131 # Remove the episode from the "children" episodes list
1132 if self
.children
is not None:
1133 self
.children
.remove(episode
)
1135 # This *might* cause episodes to be skipped if there were more than
1136 # max_episodes_per_feed items added to the feed between updates.
1137 # The benefit is that it prevents old episodes from apearing as new
1138 # in certain situations (see bug #340).
1139 self
.db
.purge(max_episodes
, self
.id) # TODO: Remove from self.children!
1141 # Sort episodes by pubdate, descending
1142 self
.children
.sort(key
=lambda e
: e
.published
, reverse
=True)
1144 def update(self
, max_episodes
=0):
1146 result
= self
.feed_fetcher
.fetch_channel(self
)
1148 if result
.status
== feedcore
.CUSTOM_FEED
:
1149 self
._consume
_custom
_feed
(result
.feed
, max_episodes
)
1150 elif result
.status
== feedcore
.UPDATED_FEED
:
1151 self
._consume
_updated
_feed
(result
.feed
, max_episodes
)
1152 elif result
.status
== feedcore
.NEW_LOCATION
:
1153 url
= result
.feed
.href
1154 logger
.info('New feed location: %s => %s', self
.url
, url
)
1155 if url
in set(x
.url
for x
in self
.model
.get_podcasts()):
1156 raise Exception('Already subscribed to ' + url
)
1158 self
._consume
_updated
_feed
(result
.feed
, max_episodes
)
1159 elif result
.status
== feedcore
.NOT_MODIFIED
:
1162 if hasattr(result
.feed
, 'headers'):
1163 self
.http_etag
= result
.feed
.headers
.get('etag', self
.http_etag
)
1164 self
.http_last_modified
= result
.feed
.headers
.get('last-modified', self
.http_last_modified
)
1166 except Exception, e
:
1167 # "Not really" errors
1168 #feedcore.AuthenticationRequired
1171 #feedcore.BadRequest
1172 #feedcore.InternalServerError
1175 #feedcore.Unsubscribe
1177 #feedcore.InvalidFeed
1178 #feedcore.UnknownStatusCode
1179 gpodder
.user_extensions
.on_podcast_update_failed(self
, e
)
1182 gpodder
.user_extensions
.on_podcast_updated(self
)
1184 # Re-determine the common prefix for all episodes
1185 self
._determine
_common
_prefix
()
1190 self
.db
.delete_podcast(self
)
1191 self
.model
._remove
_podcast
(self
)
1194 if self
.download_folder
is None:
1197 gpodder
.user_extensions
.on_podcast_save(self
)
1199 self
._clear
_changes
()
1201 self
.db
.save_podcast(self
)
1202 self
.model
._append
_podcast
(self
)
1204 def get_statistics(self
):
1206 return (0, 0, 0, 0, 0)
1208 return self
.db
.get_podcast_statistics(self
.id)
1212 if not self
.section
:
1213 self
.section
= self
._get
_content
_type
()
1218 def _get_content_type(self
):
1219 if 'youtube.com' in self
.url
or 'vimeo.com' in self
.url
:
1222 audio
, video
, other
= 0, 0, 0
1223 for content_type
in self
.db
.get_content_types(self
.id):
1224 content_type
= content_type
.lower()
1225 if content_type
.startswith('audio'):
1227 elif content_type
.startswith('video'):
1239 def authenticate_url(self
, url
):
1240 return util
.url_add_authentication(url
, self
.auth_username
, self
.auth_password
)
1242 def _get_cover_url(self
):
1243 return self
.cover_url
1245 image
= property(_get_cover_url
)
1247 def rename(self
, new_title
):
1248 new_title
= new_title
.strip()
1249 if self
.title
== new_title
:
1252 new_folder_name
= self
.find_unique_folder_name(new_title
)
1253 if new_folder_name
and new_folder_name
!= self
.download_folder
:
1254 new_folder
= os
.path
.join(gpodder
.downloads
, new_folder_name
)
1255 old_folder
= os
.path
.join(gpodder
.downloads
, self
.download_folder
)
1256 if os
.path
.exists(old_folder
):
1257 if not os
.path
.exists(new_folder
):
1258 # Old folder exists, new folder does not -> simply rename
1259 logger
.info('Renaming %s => %s', old_folder
, new_folder
)
1260 os
.rename(old_folder
, new_folder
)
1262 # Both folders exist -> move files and delete old folder
1263 logger
.info('Moving files from %s to %s', old_folder
,
1265 for file in glob
.glob(os
.path
.join(old_folder
, '*')):
1266 shutil
.move(file, new_folder
)
1267 logger
.info('Removing %s', old_folder
)
1268 shutil
.rmtree(old_folder
, ignore_errors
=True)
1269 self
.download_folder
= new_folder_name
1271 self
.title
= new_title
1274 def get_downloaded_episodes(self
):
1275 return filter(lambda e
: e
.was_downloaded(), self
.get_all_episodes())
1277 def _determine_common_prefix(self
):
1278 # We need at least 2 episodes for the prefix to be "common" ;)
1279 if len(self
.children
) < 2:
1280 self
._common
_prefix
= ''
1283 prefix
= os
.path
.commonprefix([x
.title
for x
in self
.children
])
1284 # The common prefix must end with a space - otherwise it's not
1285 # on a word boundary, and we might end up chopping off too much
1286 if prefix
and prefix
[-1] != ' ':
1287 prefix
= prefix
[:prefix
.rfind(' ')+1]
1289 self
._common
_prefix
= prefix
1291 def get_all_episodes(self
):
1292 if self
.children
is None:
1293 self
.children
= self
.db
.load_episodes(self
, self
.episode_factory
)
1294 self
._determine
_common
_prefix
()
1296 return self
.children
1298 def find_unique_folder_name(self
, download_folder
):
1299 # Remove trailing dots to avoid errors on Windows (bug 600)
1300 # Also remove leading dots to avoid hidden folders on Linux
1301 download_folder
= download_folder
.strip('.' + string
.whitespace
)
1303 for folder_name
in util
.generate_names(download_folder
):
1304 if (not self
.db
.podcast_download_folder_exists(folder_name
) or
1305 self
.download_folder
== folder_name
):
1308 def get_save_dir(self
, force_new
=False):
1309 if self
.download_folder
is None or force_new
:
1310 # we must change the folder name, because it has not been set manually
1311 fn_template
= util
.sanitize_filename(self
.title
, self
.MAX_FOLDERNAME_LENGTH
)
1314 fn_template
= util
.sanitize_filename(self
.url
, self
.MAX_FOLDERNAME_LENGTH
)
1316 # Find a unique folder name for this podcast
1317 download_folder
= self
.find_unique_folder_name(fn_template
)
1319 # Try removing the download folder if it has been created previously
1320 if self
.download_folder
is not None:
1321 folder
= os
.path
.join(gpodder
.downloads
, self
.download_folder
)
1325 logger
.info('Old download folder is kept for %s', self
.url
)
1327 logger
.info('Updating download_folder of %s to %s', self
.url
,
1329 self
.download_folder
= download_folder
1332 save_dir
= os
.path
.join(gpodder
.downloads
, self
.download_folder
)
1334 # Avoid encoding errors for OS-specific functions (bug 1570)
1335 save_dir
= util
.sanitize_encoding(save_dir
)
1337 # Create save_dir if it does not yet exist
1338 if not util
.make_directory(save_dir
):
1339 logger
.error('Could not create save_dir: %s', save_dir
)
1343 save_dir
= property(fget
=get_save_dir
)
1345 def remove_downloaded(self
):
1346 # Remove the download directory
1347 for episode
in self
.get_downloaded_episodes():
1348 filename
= episode
.local_filename(create
=False, check_only
=True)
1349 if filename
is not None:
1350 gpodder
.user_extensions
.on_episode_delete(episode
, filename
)
1352 shutil
.rmtree(self
.save_dir
, True)
1355 def cover_file(self
):
1356 return os
.path
.join(self
.save_dir
, 'folder')
1359 class Model(object):
1360 PodcastClass
= PodcastChannel
1362 def __init__(self
, db
):
1364 self
.children
= None
1366 def _append_podcast(self
, podcast
):
1367 if podcast
not in self
.children
:
1368 self
.children
.append(podcast
)
1370 def _remove_podcast(self
, podcast
):
1371 self
.children
.remove(podcast
)
1372 gpodder
.user_extensions
.on_podcast_delete(self
)
1374 def get_podcasts(self
):
1375 def podcast_factory(dct
, db
):
1376 return self
.PodcastClass
.create_from_dict(dct
, self
)
1378 if self
.children
is None:
1379 self
.children
= self
.db
.load_podcasts(podcast_factory
)
1381 # Check download folders for changes (bug 902)
1382 for podcast
in self
.children
:
1383 podcast
.check_download_folder()
1385 return self
.children
1387 def load_podcast(self
, url
, create
=True, authentication_tokens
=None,
1389 return self
.PodcastClass
.load(self
, url
, create
,
1390 authentication_tokens
,
1394 def podcast_sort_key(cls
, podcast
):
1395 return cls
.PodcastClass
.sort_key(podcast
)
1398 def episode_sort_key(cls
, episode
):
1399 return episode
.published
1402 def sort_episodes_by_pubdate(cls
, episodes
, reverse
=False):
1403 """Sort a list of PodcastEpisode objects chronologically
1405 Returns a iterable, sorted sequence of the episodes
1407 return sorted(episodes
, key
=cls
.episode_sort_key
, reverse
=reverse
)