1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2018 The gPodder Team
5 # Copyright (c) 2011 Neal H. Walfield
7 # gPodder is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # gPodder is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 # gpodder.model - Core model classes for gPodder (2009-08-13)
24 # Based on libpodcasts.py (thp, 2005-10-29)
41 from gpodder
import coverart
, feedcore
, registry
, schema
, util
, vimeo
, youtube
43 logger
= logging
.getLogger(__name__
)
49 """ abstract class for presenting a parsed feed to PodcastChannel """
52 """ :return str: the feed's title """
56 """ :return str: link to the feed's website """
59 def get_description(self
):
60 """ :return str: feed's textual description """
63 def get_cover_url(self
):
64 """ :return str: url of the feed's cover image """
67 def get_payment_url(self
):
68 """ :return str: optional -- feed's payment url """
71 def get_http_etag(self
):
72 """ :return str: optional -- last HTTP etag header, for conditional request next time """
75 def get_http_last_modified(self
):
76 """ :return str: optional -- last HTTP Last-Modified header, for conditional request next time """
79 def get_new_episodes(self
, channel
, existing_guids
):
81 Produce new episodes and update old ones.
82 Feed is a class to present results, so the feed shall have already been fetched.
83 Existing episodes not in all_seen_guids will be purged from the database.
84 :param PodcastChannel channel: the updated channel
85 :param dict(str, PodcastEpisode): existing episodes, by guid
86 :return (list(PodcastEpisode), set(str)): new_episodes, all_seen_guids
90 def get_next_page(self
, channel
, max_episodes
):
92 Paginated feed support (RFC 5005).
93 If the feed is paged, return the next feed page.
94 Returned page will in turn be asked for the next page, until None is returned.
95 :return feedcore.Result: the next feed's page,
96 as a fully parsed Feed or None
101 class PodcastParserFeed(Feed
):
102 def __init__(self
, feed
, fetcher
, max_episodes
=0):
104 self
.fetcher
= fetcher
105 self
.max_episodes
= max_episodes
108 return self
.feed
.get('title')
111 vid
= youtube
.get_youtube_id(self
.feed
['url'])
113 self
.feed
['link'] = youtube
.get_channel_id_url(self
.feed
['url'], self
.fetcher
.feed_data
)
114 return self
.feed
.get('link')
116 def get_description(self
):
117 vid
= youtube
.get_youtube_id(self
.feed
['url'])
119 self
.feed
['description'] = youtube
.get_channel_desc(self
.feed
['url'], self
.fetcher
.feed_data
)
120 return self
.feed
.get('description')
122 def get_cover_url(self
):
123 return self
.feed
.get('cover_url')
125 def get_payment_url(self
):
126 return self
.feed
.get('payment_url')
128 def get_http_etag(self
):
129 return self
.feed
.get('headers', {}).get('etag')
131 def get_http_last_modified(self
):
132 return self
.feed
.get('headers', {}).get('last-modified')
134 def get_new_episodes(self
, channel
, existing_guids
):
135 # Keep track of episode GUIDs currently seen in the feed
138 # list of new episodes
141 # We have to sort the entries in descending chronological order,
142 # because if the feed lists items in ascending order and has >
143 # max_episodes old episodes, new episodes will not be shown.
144 # See also: gPodder Bug 1186
145 entries
= sorted(self
.feed
.get('episodes', []), key
=lambda episode
: episode
['published'], reverse
=True)
147 # We can limit the maximum number of entries that gPodder will parse
148 if self
.max_episodes
> 0 and len(entries
) > self
.max_episodes
:
149 entries
= entries
[:self
.max_episodes
]
151 num_duplicate_guids
= 0
153 # Search all entries for new episodes
154 for entry
in entries
:
155 episode
= channel
.EpisodeClass
.from_podcastparser_entry(entry
, channel
)
159 # Discard episode when its GUID collides with a newer episode
160 if episode
.guid
in seen_guids
:
161 num_duplicate_guids
+= 1
162 channel
._update
_error
= ('Discarded {} episode(s) with non-unique GUID, contact the podcast publisher to fix this issue.'
163 .format(num_duplicate_guids
))
164 logger
.warn('Discarded episode with non-unique GUID, contact the podcast publisher to fix this issue. [%s] [%s]',
165 channel
.title
, episode
.title
)
168 seen_guids
.add(episode
.guid
)
169 # Detect (and update) existing episode based on GUIDs
170 existing_episode
= existing_guids
.get(episode
.guid
, None)
172 if existing_episode
.total_time
== 0 and 'youtube' in episode
.url
:
173 # query duration for existing youtube episodes that haven't been downloaded or queried
174 # such as live streams after they have ended
175 existing_episode
.total_time
= youtube
.get_total_time(episode
)
177 existing_episode
.update_from(episode
)
178 existing_episode
.save()
180 elif episode
.total_time
== 0 and 'youtube' in episode
.url
:
181 # query duration for new youtube episodes
182 episode
.total_time
= youtube
.get_total_time(episode
)
185 new_episodes
.append(episode
)
186 return new_episodes
, seen_guids
188 def get_next_page(self
, channel
, max_episodes
):
189 if 'paged_feed_next' in self
.feed
:
190 url
= self
.feed
['paged_feed_next']
191 logger
.debug("get_next_page: feed has next %s", url
)
192 url
= channel
.authenticate_url(url
)
193 return self
.fetcher
.fetch(url
, autodiscovery
=False, max_episodes
=max_episodes
)
197 class gPodderFetcher(feedcore
.Fetcher
):
199 This class implements fetching a channel from custom feed handlers
200 or the default using podcastparser
202 def fetch_channel(self
, channel
, max_episodes
):
203 custom_feed
= registry
.feed_handler
.resolve(channel
, None, max_episodes
)
204 if custom_feed
is not None:
206 # TODO: revisit authenticate_url: pass auth as kwarg
207 # If we have a username or password, rebuild the url with them included
208 # Note: using a HTTPBasicAuthHandler would be pain because we need to
209 # know the realm. It can be done, but I think this method works, too
210 url
= channel
.authenticate_url(channel
.url
)
211 return self
.fetch(url
, channel
.http_etag
, channel
.http_last_modified
, max_episodes
=max_episodes
)
213 def _resolve_url(self
, url
):
214 url
= youtube
.get_real_channel_url(url
)
215 url
= vimeo
.get_real_channel_url(url
)
218 def parse_feed(self
, url
, feed_data
, data_stream
, headers
, status
, max_episodes
=0, **kwargs
):
219 self
.feed_data
= feed_data
221 feed
= podcastparser
.parse(url
, data_stream
)
223 feed
['headers'] = headers
224 return feedcore
.Result(status
, PodcastParserFeed(feed
, self
, max_episodes
))
225 except ValueError as e
:
226 raise feedcore
.InvalidFeed('Could not parse feed: {url}: {msg}'.format(url
=url
, msg
=e
))
231 # database -> podcast -> episode -> download/playback
232 # podcast.parent == db
233 # podcast.children == [episode, ...]
234 # episode.parent == podcast
236 # - normally: episode.children = (None, None)
237 # - downloading: episode.children = (DownloadTask(), None)
238 # - playback: episode.children = (None, PlaybackTask())
241 class PodcastModelObject(object):
243 A generic base class for our podcast model providing common helper
244 and utility functions.
246 __slots__
= ('id', 'parent', 'children')
249 def create_from_dict(cls
, d
, *args
):
251 Create a new object, passing "args" to the constructor
252 and then updating the object with the values from "d".
256 # XXX: all(map(lambda k: hasattr(o, k), d))?
257 for k
, v
in d
.items():
263 class PodcastEpisode(PodcastModelObject
):
264 """holds data for one object in a channel"""
265 # In theory, Linux can have 255 bytes (not characters!) in a filename, but
266 # filesystems like eCryptFS store metadata in the filename, making the
267 # effective number of characters less than that. eCryptFS recommends
268 # 140 chars, we use 120 here (140 - len(extension) - len(".partial.webm"))
269 # (youtube-dl appends an extension after .partial, ".webm" is the longest).
270 # References: gPodder bug 1898, http://unix.stackexchange.com/a/32834
271 MAX_FILENAME_LENGTH
= 120 # without extension
272 MAX_FILENAME_WITH_EXT_LENGTH
= 140 - len(".partial.webm") # with extension
274 __slots__
= schema
.EpisodeColumns
+ ('_download_error',)
276 def _deprecated(self
):
277 raise Exception('Property is deprecated!')
279 is_played
= property(fget
=_deprecated
, fset
=_deprecated
)
280 is_locked
= property(fget
=_deprecated
, fset
=_deprecated
)
282 def has_website_link(self
):
283 return bool(self
.link
) and (self
.link
!= self
.url
or
284 youtube
.is_video_link(self
.link
))
287 def from_podcastparser_entry(cls
, entry
, channel
):
288 episode
= cls(channel
)
289 episode
.guid
= entry
['guid']
290 episode
.title
= entry
['title']
291 episode
.link
= entry
['link']
292 episode
.description
= entry
['description']
293 if entry
.get('description_html'):
294 episode
.description_html
= entry
['description_html']
296 thumbnail
= entry
.get('episode_art_url')
297 description
= util
.remove_html_tags(episode
.description
or _('No description available'))
298 episode
.description_html
= util
.nice_html_description(thumbnail
, description
)
300 episode
.total_time
= entry
['total_time']
301 episode
.published
= entry
['published']
302 episode
.payment_url
= entry
['payment_url']
304 audio_available
= any(enclosure
['mime_type'].startswith('audio/') for enclosure
in entry
['enclosures'])
305 video_available
= any(enclosure
['mime_type'].startswith('video/') for enclosure
in entry
['enclosures'])
306 link_has_media
= False
307 if not (audio_available
or video_available
):
309 episode
.url
= util
.normalize_feed_url(entry
['link'])
310 # Check if any extensions (e.g. youtube-dl) support the link
311 link_has_media
= registry
.custom_downloader
.resolve(None, None, episode
) is not None
313 media_available
= audio_available
or video_available
or link_has_media
315 for enclosure
in entry
['enclosures']:
316 episode
.mime_type
= enclosure
['mime_type']
318 # Skip images in feeds if audio or video is available (bug 979)
319 # This must (and does) also look in Media RSS enclosures (bug 1430)
320 if episode
.mime_type
.startswith('image/') and media_available
:
323 # If we have audio or video available later on, skip
324 # all 'application/*' data types (fixes Linux Outlaws and peertube feeds)
325 if episode
.mime_type
.startswith('application/') and media_available
:
328 episode
.url
= util
.normalize_feed_url(enclosure
['url'])
332 episode
.file_size
= enclosure
['file_size']
335 # Brute-force detection of the episode link
336 episode
.url
= util
.normalize_feed_url(entry
['link'])
340 if any(mod
.is_video_link(episode
.url
) for mod
in (youtube
, vimeo
)):
343 # Check if we can resolve this link to a audio/video file
344 filename
, extension
= util
.filename_from_url(episode
.url
)
345 file_type
= util
.file_type_by_extension(extension
)
347 # The link points to a audio or video file - use it!
348 if file_type
is not None:
356 def __init__(self
, channel
):
357 self
.parent
= channel
358 self
.podcast_id
= self
.parent
.id
359 self
.children
= (None, None)
365 self
.mime_type
= 'application/octet-stream'
367 self
.description
= ''
368 self
.description_html
= ''
371 self
.download_filename
= None
372 self
.payment_url
= None
374 self
.state
= gpodder
.STATE_NORMAL
376 self
.archive
= channel
.auto_archive_episodes
380 self
.current_position
= 0
381 self
.current_position_updated
= 0
383 # Timestamp of last playback time
384 self
.last_playback
= 0
386 self
._download
_error
= None
394 return self
.parent
.parent
.db
397 def trimmed_title(self
):
398 """Return the title with the common prefix trimmed"""
399 # Minimum amount of leftover characters after trimming. This
400 # avoids things like "Common prefix 123" to become just "123".
401 # If there are LEFTOVER_MIN or less characters after trimming,
402 # the original title will be returned without trimming.
405 # "Podcast Name - Title" and "Podcast Name: Title" -> "Title"
406 for postfix
in (' - ', ': '):
407 prefix
= self
.parent
.title
+ postfix
408 if (self
.title
.startswith(prefix
) and
409 len(self
.title
) - len(prefix
) > LEFTOVER_MIN
):
410 return self
.title
[len(prefix
):]
413 # "Podcast Name <number>: ..." -> "<number>: ..."
414 r
'^%s (\d+: .*)' % re
.escape(self
.parent
.title
),
416 # "Episode <number>: ..." -> "<number>: ..."
420 for pattern
in regex_patterns
:
421 if re
.match(pattern
, self
.title
):
422 title
= re
.sub(pattern
, r
'\1', self
.title
)
423 if len(title
) > LEFTOVER_MIN
:
426 # "#001: Title" -> "001: Title"
428 not self
.parent
._common
_prefix
and
429 re
.match(r
'^#\d+: ', self
.title
) and
430 len(self
.title
) - 1 > LEFTOVER_MIN
):
431 return self
.title
[1:]
433 if (self
.parent
._common
_prefix
is not None and
434 self
.title
.startswith(self
.parent
._common
_prefix
) and
435 len(self
.title
) - len(self
.parent
._common
_prefix
) > LEFTOVER_MIN
):
436 return self
.title
[len(self
.parent
._common
_prefix
):]
440 def _set_download_task(self
, download_task
):
441 self
.children
= (download_task
, self
.children
[1])
443 def _get_download_task(self
):
444 return self
.children
[0]
446 download_task
= property(_get_download_task
, _set_download_task
)
449 def downloading(self
):
450 task
= self
.download_task
454 return task
.status
in (task
.DOWNLOADING
, task
.QUEUED
, task
.PAUSING
, task
.PAUSED
, task
.CANCELLING
)
456 def get_player(self
, config
):
457 file_type
= self
.file_type()
458 if file_type
== 'video' and config
.player
.video
and config
.player
.video
!= 'default':
459 player
= config
.player
.video
460 elif file_type
== 'audio' and config
.player
.audio
and config
.player
.audio
!= 'default':
461 player
= config
.player
.audio
466 def can_play(self
, config
):
468 # gPodder.playback_episodes() filters selection with this method.
470 return self
.was_downloaded(and_exists
=True) or self
.can_stream(config
)
472 def can_stream(self
, config
):
474 Don't try streaming if the user has not defined a player
475 or else we would probably open the browser when giving a URL to xdg-open.
476 We look at the audio or video player depending on its file type.
478 player
= self
.get_player(config
)
479 return player
and player
!= 'default'
481 def can_download(self
):
483 gPodder.on_download_selected_episodes() filters selection with this method.
484 PAUSING and PAUSED tasks can be resumed.
486 return not self
.was_downloaded(and_exists
=True) and (
487 not self
.download_task
488 or self
.download_task
.status
in (self
.download_task
.PAUSING
, self
.download_task
.PAUSED
, self
.download_task
.FAILED
))
492 gPodder.on_pause_selected_episodes() filters selection with this method.
494 return self
.download_task
and self
.download_task
.status
in (self
.download_task
.QUEUED
, self
.download_task
.DOWNLOADING
)
496 def can_cancel(self
):
498 DownloadTask.cancel() only cancels the following tasks.
500 return self
.download_task
and self
.download_task
.status
in \
501 (self
.download_task
.DOWNLOADING
, self
.download_task
.QUEUED
, self
.download_task
.PAUSED
, self
.download_task
.FAILED
)
503 def can_delete(self
):
505 gPodder.delete_episode_list() filters out locked episodes, and cancels all unlocked tasks in selection.
507 return self
.state
!= gpodder
.STATE_DELETED
and not self
.archive
and (
508 not self
.download_task
or self
.download_task
.status
== self
.download_task
.FAILED
)
510 def check_is_new(self
):
511 return (self
.state
== gpodder
.STATE_NORMAL
and self
.is_new
and
512 not self
.downloading
)
515 gpodder
.user_extensions
.on_episode_save(self
)
516 self
.db
.save_episode(self
)
518 def on_downloaded(self
, filename
):
519 self
.state
= gpodder
.STATE_DOWNLOADED
521 self
.file_size
= os
.path
.getsize(filename
)
524 def set_state(self
, state
):
528 def playback_mark(self
):
530 self
.last_playback
= int(time
.time())
531 gpodder
.user_extensions
.on_episode_playback(self
)
534 def mark(self
, state
=None, is_played
=None, is_locked
=None):
535 if state
is not None:
537 if is_played
is not None:
538 self
.is_new
= not is_played
540 # "Mark as new" must "undelete" the episode
541 if self
.is_new
and self
.state
== gpodder
.STATE_DELETED
:
542 self
.state
= gpodder
.STATE_NORMAL
543 if is_locked
is not None:
544 self
.archive
= is_locked
547 def age_in_days(self
):
548 return util
.file_age_in_days(self
.local_filename(create
=False,
551 age_int_prop
= property(fget
=age_in_days
)
553 def get_age_string(self
):
554 return util
.file_age_to_string(self
.age_in_days())
556 age_prop
= property(fget
=get_age_string
)
558 def one_line_description(self
):
559 MAX_LINE_LENGTH
= 120
560 desc
= util
.remove_html_tags(self
.description
or '')
561 desc
= re
.sub(r
'\s+', ' ', desc
).strip()
563 return _('No description available')
565 # Decode the description to avoid gPodder bug 1277
566 desc
= util
.convert_bytes(desc
).strip()
568 if len(desc
) > MAX_LINE_LENGTH
:
569 return desc
[:MAX_LINE_LENGTH
] + '...'
573 def delete_from_disk(self
):
574 filename
= self
.local_filename(create
=False, check_only
=True)
575 if filename
is not None:
576 gpodder
.user_extensions
.on_episode_delete(self
, filename
)
577 util
.delete_file(filename
)
579 self
._download
_error
= None
580 self
.set_state(gpodder
.STATE_DELETED
)
582 def get_playback_url(self
, config
=None, allow_partial
=False):
583 """Local (or remote) playback/streaming filename/URL
585 Returns either the local filename or a streaming URL that
586 can be used to playback this episode.
588 Also returns the filename of a partially downloaded file
589 in case partial (preview) playback is desired.
591 url
= self
.local_filename(create
=False)
593 if (allow_partial
and url
is not None and
594 os
.path
.exists(url
+ '.partial')):
595 return url
+ '.partial'
597 if url
is None or not os
.path
.exists(url
):
598 # FIXME: may custom downloaders provide the real url ?
599 url
= registry
.download_url
.resolve(config
, self
.url
, self
, allow_partial
)
602 def find_unique_file_name(self
, filename
, extension
):
603 # Remove leading and trailing whitespace + dots (to avoid hidden files)
604 filename
= filename
.strip('.' + string
.whitespace
) + extension
606 for name
in util
.generate_names(filename
):
607 if (not self
.db
.episode_filename_exists(self
.podcast_id
, name
) or
608 self
.download_filename
== name
):
611 def local_filename(self
, create
, force_update
=False, check_only
=False,
612 template
=None, return_wanted_filename
=False):
613 """Get (and possibly generate) the local saving filename
615 Pass create=True if you want this function to generate a
616 new filename if none exists. You only want to do this when
617 planning to create/download the file after calling this function.
619 Normally, you should pass create=False. This will only
620 create a filename when the file already exists from a previous
621 version of gPodder (where we used md5 filenames). If the file
622 does not exist (and the filename also does not exist), this
623 function will return None.
625 If you pass force_update=True to this function, it will try to
626 find a new (better) filename and move the current file if this
627 is the case. This is useful if (during the download) you get
628 more information about the file, e.g. the mimetype and you want
629 to include this information in the file name generation process.
631 If check_only=True is passed to this function, it will never try
632 to rename the file, even if would be a good idea. Use this if you
633 only want to check if a file exists.
635 If "template" is specified, it should be a filename that is to
636 be used as a template for generating the "real" filename.
638 The generated filename is stored in the database for future access.
640 If return_wanted_filename is True, the filename will not be written to
641 the database, but simply returned by this function (for use by the
642 "import external downloads" feature).
644 if self
.download_filename
is None and (check_only
or not create
):
647 ext
= self
.extension(may_call_local_filename
=False)
649 if not check_only
and (force_update
or not self
.download_filename
):
650 # Avoid and catch gPodder bug 1440 and similar situations
652 logger
.warn('Empty template. Report this podcast URL %s',
656 # Try to find a new filename for the current file
657 if template
is not None:
658 # If template is specified, trust the template's extension
659 episode_filename
, ext
= os
.path
.splitext(template
)
661 episode_filename
, _
= util
.filename_from_url(self
.url
)
663 if 'redirect' in episode_filename
and template
is None:
664 # This looks like a redirection URL - force URL resolving!
665 logger
.warn('Looks like a redirection to me: %s', self
.url
)
666 url
= util
.get_real_url(self
.channel
.authenticate_url(self
.url
))
667 logger
.info('Redirection resolved to: %s', url
)
668 episode_filename
, _
= util
.filename_from_url(url
)
670 # Use title for YouTube, Vimeo and Soundcloud downloads
671 if (youtube
.is_video_link(self
.url
) or
672 vimeo
.is_video_link(self
.url
) or
673 episode_filename
== 'stream'):
674 episode_filename
= self
.title
676 # If the basename is empty, use the md5 hexdigest of the URL
677 if not episode_filename
or episode_filename
.startswith('redirect.'):
678 logger
.error('Report this feed: Podcast %s, episode %s',
679 self
.channel
.url
, self
.url
)
680 episode_filename
= hashlib
.md5(self
.url
.encode('utf-8')).hexdigest()
682 # Also sanitize ext (see #591 where ext=.mp3?dest-id=754182)
683 fn_template
, ext
= util
.sanitize_filename_ext(
686 self
.MAX_FILENAME_LENGTH
,
687 self
.MAX_FILENAME_WITH_EXT_LENGTH
)
688 # Find a unique filename for this episode
689 wanted_filename
= self
.find_unique_file_name(fn_template
, ext
)
691 if return_wanted_filename
:
692 # return the calculated filename without updating the database
693 return wanted_filename
695 # The old file exists, but we have decided to want a different filename
696 if self
.download_filename
and wanted_filename
!= self
.download_filename
:
697 # there might be an old download folder crawling around - move it!
698 new_file_name
= os
.path
.join(self
.channel
.save_dir
, wanted_filename
)
699 old_file_name
= os
.path
.join(self
.channel
.save_dir
, self
.download_filename
)
700 if os
.path
.exists(old_file_name
) and not os
.path
.exists(new_file_name
):
701 logger
.info('Renaming %s => %s', old_file_name
, new_file_name
)
702 os
.rename(old_file_name
, new_file_name
)
703 elif force_update
and not os
.path
.exists(old_file_name
):
704 # When we call force_update, the file might not yet exist when we
705 # call it from the downloading code before saving the file
706 logger
.info('Choosing new filename: %s', new_file_name
)
708 logger
.warn('%s exists or %s does not', new_file_name
, old_file_name
)
709 logger
.info('Updating filename of %s to "%s".', self
.url
, wanted_filename
)
710 elif self
.download_filename
is None:
711 logger
.info('Setting download filename: %s', wanted_filename
)
712 self
.download_filename
= wanted_filename
715 if return_wanted_filename
:
716 # return the filename, not full path
717 return self
.download_filename
718 return os
.path
.join(self
.channel
.save_dir
, self
.download_filename
)
720 def extension(self
, may_call_local_filename
=True):
721 filename
, ext
= util
.filename_from_url(self
.url
)
722 if may_call_local_filename
:
723 filename
= self
.local_filename(create
=False)
724 if filename
is not None:
725 filename
, ext
= os
.path
.splitext(filename
)
726 # if we can't detect the extension from the url fallback on the mimetype
727 if ext
== '' or util
.file_type_by_extension(ext
) is None:
728 ext
= util
.extension_from_mimetype(self
.mime_type
)
739 def file_exists(self
):
740 filename
= self
.local_filename(create
=False, check_only
=True)
744 return os
.path
.exists(filename
)
746 def was_downloaded(self
, and_exists
=False):
747 if self
.state
!= gpodder
.STATE_DOWNLOADED
:
749 if and_exists
and not self
.file_exists():
753 def sync_filename(self
, use_custom
=False, custom_format
=None):
755 return util
.object_string_formatter(custom_format
,
756 episode
=self
, podcast
=self
.channel
)
761 # Assume all YouTube/Vimeo links are video files
762 if youtube
.is_video_link(self
.url
) or vimeo
.is_video_link(self
.url
):
765 return util
.file_type_by_extension(self
.extension())
769 return os
.path
.splitext(os
.path
.basename(self
.url
))[0]
774 Returns published time as HHMM (or 0000 if not available)
777 return datetime
.datetime
.fromtimestamp(self
.published
).strftime('%H%M')
779 logger
.warn('Cannot format pubtime: %s', self
.title
, exc_info
=True)
782 def playlist_title(self
):
783 """Return a title for this episode in a playlist
785 The title will be composed of the podcast name, the
786 episode name and the publication date. The return
787 value is the canonical representation of this episode
788 in playlists (for example, M3U playlists).
790 return '%s - %s (%s)' % (self
.channel
.title
,
794 def cute_pubdate(self
):
795 result
= util
.format_date(self
.published
)
797 return '(%s)' % _('unknown')
801 pubdate_prop
= property(fget
=cute_pubdate
)
803 def published_datetime(self
):
804 return datetime
.datetime
.fromtimestamp(self
.published
)
808 return self
.published_datetime().strftime('%Y-%m-%d')
811 def pubdate_day(self
):
812 return self
.published_datetime().strftime('%d')
815 def pubdate_month(self
):
816 return self
.published_datetime().strftime('%m')
819 def pubdate_year(self
):
820 return self
.published_datetime().strftime('%y')
822 def is_finished(self
):
823 """Return True if this episode is considered "finished playing"
825 An episode is considered "finished" when there is a
826 current position mark on the track, and when the
827 current position is greater than 99 percent of the
828 total time or inside the last 10 seconds of a track.
830 return (self
.current_position
> 0 and self
.total_time
> 0 and
831 (self
.current_position
+ 10 >= self
.total_time
or
832 self
.current_position
>= self
.total_time
* .99))
834 def get_play_info_string(self
, duration_only
=False):
835 duration
= util
.format_time(self
.total_time
)
836 if duration_only
and self
.total_time
> 0:
838 elif self
.is_finished():
839 return '%s (%s)' % (_('Finished'), duration
)
840 elif self
.current_position
> 0 and \
841 self
.current_position
!= self
.total_time
:
842 position
= util
.format_time(self
.current_position
)
843 return '%s / %s' % (position
, duration
)
844 elif self
.total_time
> 0:
849 def update_from(self
, episode
):
850 for k
in ('title', 'url', 'description', 'description_html', 'link', 'published', 'guid', 'payment_url'):
851 setattr(self
, k
, getattr(episode
, k
))
852 # Don't overwrite file size on downloaded episodes
853 # See #648 refreshing a youtube podcast clears downloaded file size
854 if self
.state
!= gpodder
.STATE_DOWNLOADED
:
855 setattr(self
, 'file_size', getattr(episode
, 'file_size'))
858 class PodcastChannel(PodcastModelObject
):
859 __slots__
= schema
.PodcastColumns
+ ('_common_prefix', '_update_error',)
861 UNICODE_TRANSLATE
= {ord('ö'): 'o', ord('ä'): 'a', ord('ü'): 'u'}
863 # Enumerations for download strategy
864 STRATEGY_DEFAULT
, STRATEGY_LATEST
= list(range(2))
866 # Description and ordering of strategies
868 (STRATEGY_DEFAULT
, _('Default')),
869 (STRATEGY_LATEST
, _('Only keep latest')),
872 MAX_FOLDERNAME_LENGTH
= 60
873 SECONDS_PER_DAY
= 24 * 60 * 60
874 SECONDS_PER_WEEK
= 7 * 24 * 60 * 60
875 EpisodeClass
= PodcastEpisode
877 feed_fetcher
= gPodderFetcher()
879 def __init__(self
, model
, id=None):
887 self
.description
= ''
888 self
.cover_url
= None
889 self
.payment_url
= None
891 self
.auth_username
= ''
892 self
.auth_password
= ''
894 self
.http_last_modified
= None
895 self
.http_etag
= None
897 self
.auto_archive_episodes
= False
898 self
.download_folder
= None
899 self
.pause_subscription
= False
900 self
.sync_to_mp3_player
= True
901 self
.cover_thumb
= None
903 self
.section
= _('Other')
904 self
._common
_prefix
= None
905 self
.download_strategy
= PodcastChannel
.STRATEGY_DEFAULT
908 self
.children
= self
.db
.load_episodes(self
, self
.episode_factory
)
909 self
._determine
_common
_prefix
()
911 self
._update
_error
= None
919 return self
.parent
.db
921 def get_download_strategies(self
):
922 for value
, caption
in PodcastChannel
.STRATEGIES
:
923 yield self
.download_strategy
== value
, value
, caption
925 def set_download_strategy(self
, download_strategy
):
926 if download_strategy
== self
.download_strategy
:
929 caption
= dict(self
.STRATEGIES
).get(download_strategy
)
930 if caption
is not None:
931 logger
.debug('Strategy for %s changed to %s', self
.title
, caption
)
932 self
.download_strategy
= download_strategy
934 logger
.warn('Cannot set strategy to %d', download_strategy
)
936 def rewrite_url(self
, new_url
):
937 new_url
= util
.normalize_feed_url(new_url
)
942 self
.http_etag
= None
943 self
.http_last_modified
= None
947 def check_download_folder(self
):
948 """Check the download folder for externally-downloaded files
950 This will try to assign downloaded files with episodes in the
953 This will also cause missing files to be marked as deleted.
957 for episode
in self
.get_episodes(gpodder
.STATE_DOWNLOADED
):
958 if episode
.was_downloaded():
959 filename
= episode
.local_filename(create
=False)
961 # No filename has been determined for this episode
964 if not os
.path
.exists(filename
):
965 # File has been deleted by the user - simulate a
966 # delete event (also marks the episode as deleted)
967 logger
.debug('Episode deleted: %s', filename
)
968 episode
.delete_from_disk()
971 known_files
.add(filename
)
973 existing_files
= set(filename
for filename
in
974 glob
.glob(os
.path
.join(self
.save_dir
, '*'))
975 if not filename
.endswith('.partial'))
977 ignore_files
= ['folder' + ext
for ext
in
978 coverart
.CoverDownloader
.EXTENSIONS
]
980 external_files
= existing_files
.difference(list(known_files
) +
981 [os
.path
.join(self
.save_dir
, ignore_file
)
982 for ignore_file
in ignore_files
])
983 if not external_files
:
986 all_episodes
= self
.get_all_episodes()
988 for filename
in external_files
:
991 basename
= os
.path
.basename(filename
)
992 existing
= [e
for e
in all_episodes
if e
.download_filename
== basename
]
994 existing
= existing
[0]
995 logger
.info('Importing external download: %s', filename
)
996 existing
.on_downloaded(filename
)
999 for episode
in all_episodes
:
1000 wanted_filename
= episode
.local_filename(create
=True,
1001 return_wanted_filename
=True)
1002 if basename
== wanted_filename
:
1003 logger
.info('Importing external download: %s', filename
)
1004 episode
.download_filename
= basename
1005 episode
.on_downloaded(filename
)
1009 wanted_base
, wanted_ext
= os
.path
.splitext(wanted_filename
)
1010 target_base
, target_ext
= os
.path
.splitext(basename
)
1011 if wanted_base
== target_base
:
1012 # Filenames only differ by the extension
1013 wanted_type
= util
.file_type_by_extension(wanted_ext
)
1014 target_type
= util
.file_type_by_extension(target_ext
)
1016 # If wanted type is None, assume that we don't know
1017 # the right extension before the download (e.g. YouTube)
1018 # if the wanted type is the same as the target type,
1019 # assume that it's the correct file
1020 if wanted_type
is None or wanted_type
== target_type
:
1021 logger
.info('Importing external download: %s', filename
)
1022 episode
.download_filename
= basename
1023 episode
.on_downloaded(filename
)
1027 if not found
and not util
.is_system_file(filename
):
1028 logger
.warn('Unknown external file: %s', filename
)
1031 def sort_key(cls
, podcast
):
1032 key
= util
.convert_bytes(podcast
.title
.lower())
1033 return re
.sub(r
'^the ', '', key
).translate(cls
.UNICODE_TRANSLATE
)
1036 def load(cls
, model
, url
, create
=True, authentication_tokens
=None, max_episodes
=0):
1037 existing
= [p
for p
in model
.get_podcasts() if p
.url
== url
]
1045 if authentication_tokens
is not None:
1046 tmp
.auth_username
= authentication_tokens
[0]
1047 tmp
.auth_password
= authentication_tokens
[1]
1049 # Save podcast, so it gets an ID assigned before
1050 # updating the feed and adding saving episodes
1054 tmp
.update(max_episodes
)
1055 except Exception as e
:
1056 logger
.debug('Fetch failed. Removing buggy feed.')
1057 tmp
.remove_downloaded()
1061 # Determine the section in which this podcast should appear
1062 tmp
.section
= tmp
._get
_content
_type
()
1064 # Determine a new download folder now that we have the title
1065 tmp
.get_save_dir(force_new
=True)
1067 # Mark episodes as downloaded if files already exist (bug 902)
1068 tmp
.check_download_folder()
1070 # Determine common prefix of episode titles
1071 tmp
._determine
_common
_prefix
()
1075 gpodder
.user_extensions
.on_podcast_subscribe(tmp
)
1079 def episode_factory(self
, d
):
1081 This function takes a dictionary containing key-value pairs for
1082 episodes and returns a new PodcastEpisode object that is connected
1085 Returns: A new PodcastEpisode object
1087 return self
.EpisodeClass
.create_from_dict(d
, self
)
1089 def _consume_updated_title(self
, new_title
):
1090 # Replace multi-space and newlines with single space (Maemo bug 11173)
1091 new_title
= re
.sub(r
'\s+', ' ', new_title
).strip()
1093 # Only update the podcast-supplied title when we
1094 # don't yet have a title, or if the title is the
1095 # feed URL (e.g. we didn't find a title before).
1096 if not self
.title
or self
.title
== self
.url
:
1097 self
.title
= new_title
1099 # Start YouTube- and Vimeo-specific title FIX
1100 YOUTUBE_PREFIX
= 'Uploads by '
1101 VIMEO_PREFIX
= 'Vimeo / '
1102 if self
.title
.startswith(YOUTUBE_PREFIX
):
1103 self
.title
= self
.title
[len(YOUTUBE_PREFIX
):] + ' on YouTube'
1104 elif self
.title
.startswith(VIMEO_PREFIX
):
1105 self
.title
= self
.title
[len(VIMEO_PREFIX
):] + ' on Vimeo'
1106 # End YouTube- and Vimeo-specific title FIX
1108 def _consume_metadata(self
, title
, link
, description
, cover_url
,
1110 self
._consume
_updated
_title
(title
)
1112 self
.description
= description
1113 self
.cover_url
= cover_url
1114 self
.payment_url
= payment_url
1117 def _consume_updated_feed(self
, feed
, max_episodes
=0):
1118 self
._consume
_metadata
(feed
.get_title() or self
.url
,
1119 feed
.get_link() or self
.link
,
1120 feed
.get_description() or '',
1121 feed
.get_cover_url() or None,
1122 feed
.get_payment_url() or None)
1124 # Update values for HTTP conditional requests
1125 self
.http_etag
= feed
.get_http_etag() or self
.http_etag
1126 self
.http_last_modified
= feed
.get_http_last_modified() or self
.http_last_modified
1128 # Load all episodes to update them properly.
1129 existing
= self
.get_all_episodes()
1130 # GUID-based existing episode list
1131 existing_guids
= {e
.guid
: e
for e
in existing
}
1133 # Get most recent published of all episodes
1134 last_published
= self
.db
.get_last_published(self
) or 0
1135 # fix for #516 an episode was marked published one month in the future (typo in month number)
1136 # causing every new episode to be marked old
1137 tomorrow
= datetime
.datetime
.now().timestamp() + self
.SECONDS_PER_DAY
1138 if last_published
> tomorrow
:
1139 logger
.debug('Episode published in the future for podcast %s', self
.title
)
1140 last_published
= tomorrow
1142 # new episodes from feed
1143 new_episodes
, seen_guids
= feed
.get_new_episodes(self
, existing_guids
)
1147 next_max_episodes
= max_episodes
- len(seen_guids
)
1148 # want to paginate if:
1149 # - we raised the max episode count so we want more old episodes now
1150 # FIXME: could also be that feed has less episodes than max_episodes and we're paginating for nothing
1151 # - all episodes are new so we continue getting them until max_episodes is reached
1152 could_have_more
= max_episodes
> len(existing
) or len(new_episodes
) == len(seen_guids
)
1153 while next_feed
and could_have_more
:
1154 if max_episodes
> 0 and next_max_episodes
<= 0:
1155 logger
.debug("stopping pagination: seen enough episodes (%i)", max_episodes
)
1157 # brand new: try to load another page!
1158 next_result
= next_feed
.get_next_page(self
, next_max_episodes
)
1159 if next_result
and next_result
.status
== feedcore
.UPDATED_FEED
:
1160 next_feed
= next_result
.feed
1161 for e
in new_episodes
:
1162 existing_guids
[e
.guid
] = e
1163 next_new_episodes
, next_seen_guids
= next_feed
.get_new_episodes(self
, existing_guids
)
1164 logger
.debug("next page has %i new episodes, %i seen episodes", len(next_new_episodes
), len(next_seen_guids
))
1165 if not next_seen_guids
:
1166 logger
.debug("breaking out of get_next_page loop because no episode in this page")
1168 next_max_episodes
-= len(next_seen_guids
)
1169 new_episodes
+= next_new_episodes
1170 seen_guids
= seen_guids
.union(next_seen_guids
)
1174 # mark episodes not new
1175 real_new_episode_count
= 0
1176 # Search all entries for new episodes
1177 for episode
in new_episodes
:
1178 # Workaround for bug 340: If the episode has been
1179 # published earlier than one week before the most
1180 # recent existing episode, do not mark it as new.
1181 if episode
.published
< last_published
- self
.SECONDS_PER_WEEK
:
1182 logger
.debug('Episode with old date: %s', episode
.title
)
1183 episode
.is_new
= False
1187 real_new_episode_count
+= 1
1189 # Only allow a certain number of new episodes per update
1190 if (self
.download_strategy
== PodcastChannel
.STRATEGY_LATEST
and
1191 real_new_episode_count
> 1):
1192 episode
.is_new
= False
1195 self
.children
.extend(new_episodes
)
1197 self
.remove_unreachable_episodes(existing
, seen_guids
, max_episodes
)
1199 def remove_unreachable_episodes(self
, existing
, seen_guids
, max_episodes
):
1200 # Remove "unreachable" episodes - episodes that have not been
1201 # downloaded and that the feed does not list as downloadable anymore
1202 # Keep episodes that are currently being downloaded, though (bug 1534)
1203 if self
.id is not None:
1204 episodes_to_purge
= [e
for e
in existing
if
1205 e
.state
!= gpodder
.STATE_DOWNLOADED
and
1206 e
.guid
not in seen_guids
and not e
.downloading
]
1208 for episode
in episodes_to_purge
:
1209 logger
.debug('Episode removed from feed: %s (%s)',
1210 episode
.title
, episode
.guid
)
1211 gpodder
.user_extensions
.on_episode_removed_from_podcast(episode
)
1212 self
.db
.delete_episode_by_guid(episode
.guid
, self
.id)
1214 # Remove the episode from the "children" episodes list
1215 if self
.children
is not None:
1216 self
.children
.remove(episode
)
1218 # This *might* cause episodes to be skipped if there were more than
1219 # max_episodes_per_feed items added to the feed between updates.
1220 # The benefit is that it prevents old episodes from apearing as new
1221 # in certain situations (see bug #340).
1222 self
.db
.purge(max_episodes
, self
.id) # TODO: Remove from self.children!
1224 # Sort episodes by pubdate, descending
1225 self
.children
.sort(key
=lambda e
: e
.published
, reverse
=True)
1227 def update(self
, max_episodes
=0):
1228 max_episodes
= int(max_episodes
)
1230 result
= self
.feed_fetcher
.fetch_channel(self
, max_episodes
)
1232 if result
.status
== feedcore
.UPDATED_FEED
:
1233 self
._consume
_updated
_feed
(result
.feed
, max_episodes
)
1234 elif result
.status
== feedcore
.NEW_LOCATION
:
1235 # FIXME: could return the feed because in autodiscovery it is parsed already
1237 logger
.info('New feed location: %s => %s', self
.url
, url
)
1238 if url
in set(x
.url
for x
in self
.model
.get_podcasts()):
1239 raise Exception('Already subscribed to ' + url
)
1241 # With the updated URL, fetch the feed again
1242 self
.update(max_episodes
)
1244 elif result
.status
== feedcore
.NOT_MODIFIED
:
1248 except Exception as e
:
1249 # "Not really" errors
1250 # feedcore.AuthenticationRequired
1253 # feedcore.BadRequest
1254 # feedcore.InternalServerError
1255 # feedcore.WifiLogin
1257 # feedcore.Unsubscribe
1259 # feedcore.InvalidFeed
1260 # feedcore.UnknownStatusCode
1261 gpodder
.user_extensions
.on_podcast_update_failed(self
, e
)
1264 gpodder
.user_extensions
.on_podcast_updated(self
)
1266 # Re-determine the common prefix for all episodes
1267 self
._determine
_common
_prefix
()
1272 self
.db
.delete_podcast(self
)
1273 self
.model
._remove
_podcast
(self
)
1276 if self
.download_folder
is None:
1279 gpodder
.user_extensions
.on_podcast_save(self
)
1281 self
.db
.save_podcast(self
)
1282 self
.model
._append
_podcast
(self
)
1284 def get_statistics(self
):
1286 return (0, 0, 0, 0, 0)
1288 return self
.db
.get_podcast_statistics(self
.id)
1292 if not self
.section
:
1293 self
.section
= self
._get
_content
_type
()
1298 def _get_content_type(self
):
1299 if 'youtube.com' in self
.url
or 'vimeo.com' in self
.url
:
1302 audio
, video
, other
= 0, 0, 0
1303 for content_type
in self
.db
.get_content_types(self
.id):
1304 content_type
= content_type
.lower()
1305 if content_type
.startswith('audio'):
1307 elif content_type
.startswith('video'):
1319 def authenticate_url(self
, url
):
1320 return util
.url_add_authentication(url
, self
.auth_username
, self
.auth_password
)
1322 def rename(self
, new_title
):
1323 new_title
= new_title
.strip()
1324 if self
.title
== new_title
:
1327 fn_template
= util
.sanitize_filename(new_title
, self
.MAX_FOLDERNAME_LENGTH
)
1329 new_folder_name
= self
.find_unique_folder_name(fn_template
)
1330 if new_folder_name
and new_folder_name
!= self
.download_folder
:
1331 new_folder
= os
.path
.join(gpodder
.downloads
, new_folder_name
)
1332 old_folder
= os
.path
.join(gpodder
.downloads
, self
.download_folder
)
1333 if os
.path
.exists(old_folder
):
1334 if not os
.path
.exists(new_folder
):
1335 # Old folder exists, new folder does not -> simply rename
1336 logger
.info('Renaming %s => %s', old_folder
, new_folder
)
1337 os
.rename(old_folder
, new_folder
)
1339 # Both folders exist -> move files and delete old folder
1340 logger
.info('Moving files from %s to %s', old_folder
,
1342 for file in glob
.glob(os
.path
.join(old_folder
, '*')):
1343 shutil
.move(file, new_folder
)
1344 logger
.info('Removing %s', old_folder
)
1345 shutil
.rmtree(old_folder
, ignore_errors
=True)
1346 self
.download_folder
= new_folder_name
1348 self
.title
= new_title
1351 def _determine_common_prefix(self
):
1352 # We need at least 2 episodes for the prefix to be "common" ;)
1353 if len(self
.children
) < 2:
1354 self
._common
_prefix
= ''
1357 prefix
= os
.path
.commonprefix([x
.title
for x
in self
.children
])
1358 # The common prefix must end with a space - otherwise it's not
1359 # on a word boundary, and we might end up chopping off too much
1360 if prefix
and prefix
[-1] != ' ':
1361 prefix
= prefix
[:prefix
.rfind(' ') + 1]
1363 self
._common
_prefix
= prefix
1365 def get_all_episodes(self
):
1366 return self
.children
1368 def get_episodes(self
, state
):
1369 return [e
for e
in self
.get_all_episodes() if e
.state
== state
]
1371 def find_unique_folder_name(self
, download_folder
):
1372 # Remove trailing dots to avoid errors on Windows (bug 600)
1373 # Also remove leading dots to avoid hidden folders on Linux
1374 download_folder
= download_folder
.strip('.' + string
.whitespace
)
1376 for folder_name
in util
.generate_names(download_folder
):
1377 if (not self
.db
.podcast_download_folder_exists(folder_name
) or
1378 self
.download_folder
== folder_name
):
1381 def get_save_dir(self
, force_new
=False):
1382 if self
.download_folder
is None or force_new
:
1383 fn_template
= util
.sanitize_filename(self
.title
, self
.MAX_FOLDERNAME_LENGTH
)
1386 fn_template
= util
.sanitize_filename(self
.url
, self
.MAX_FOLDERNAME_LENGTH
)
1388 # Find a unique folder name for this podcast
1389 download_folder
= self
.find_unique_folder_name(fn_template
)
1391 # Try removing the download folder if it has been created previously
1392 if self
.download_folder
is not None:
1393 folder
= os
.path
.join(gpodder
.downloads
, self
.download_folder
)
1397 logger
.info('Old download folder is kept for %s', self
.url
)
1399 logger
.info('Updating download_folder of %s to %s', self
.url
,
1401 self
.download_folder
= download_folder
1404 save_dir
= os
.path
.join(gpodder
.downloads
, self
.download_folder
)
1406 # Create save_dir if it does not yet exist
1407 if not util
.make_directory(save_dir
):
1408 logger
.error('Could not create save_dir: %s', save_dir
)
1412 save_dir
= property(fget
=get_save_dir
)
1414 def remove_downloaded(self
):
1415 # Remove the download directory
1416 for episode
in self
.get_episodes(gpodder
.STATE_DOWNLOADED
):
1417 filename
= episode
.local_filename(create
=False, check_only
=True)
1418 if filename
is not None:
1419 gpodder
.user_extensions
.on_episode_delete(episode
, filename
)
1421 shutil
.rmtree(self
.save_dir
, True)
1424 def cover_file(self
):
1425 return os
.path
.join(self
.save_dir
, 'folder')
1428 class Model(object):
1429 PodcastClass
= PodcastChannel
1431 def __init__(self
, db
):
1433 self
.children
= None
1435 def _append_podcast(self
, podcast
):
1436 if podcast
not in self
.children
:
1437 self
.children
.append(podcast
)
1439 def _remove_podcast(self
, podcast
):
1440 self
.children
.remove(podcast
)
1441 gpodder
.user_extensions
.on_podcast_delete(podcast
)
1443 def get_podcasts(self
):
1444 def podcast_factory(dct
, db
):
1445 return self
.PodcastClass
.create_from_dict(dct
, self
, dct
['id'])
1447 if self
.children
is None:
1448 self
.children
= self
.db
.load_podcasts(podcast_factory
)
1450 # Check download folders for changes (bug 902)
1451 for podcast
in self
.children
:
1452 podcast
.check_download_folder()
1454 return self
.children
1456 def get_podcast(self
, url
):
1457 for p
in self
.get_podcasts():
1462 def load_podcast(self
, url
, create
=True, authentication_tokens
=None,
1464 assert all(url
!= podcast
.url
for podcast
in self
.get_podcasts())
1465 return self
.PodcastClass
.load(self
, url
, create
,
1466 authentication_tokens
,
1470 def podcast_sort_key(cls
, podcast
):
1471 return cls
.PodcastClass
.sort_key(podcast
)
1474 def episode_sort_key(cls
, episode
):
1475 return episode
.published
1478 def sort_episodes_by_pubdate(cls
, episodes
, reverse
=False):
1479 """Sort a list of PodcastEpisode objects chronologically
1481 Returns a iterable, sorted sequence of the episodes
1483 return sorted(episodes
, key
=cls
.episode_sort_key
, reverse
=reverse
)
1486 def check_root_folder_path():
1488 if gpodder
.ui
.win32
:
1489 longest
= len(root
) \
1490 + 1 + PodcastChannel
.MAX_FOLDERNAME_LENGTH \
1491 + 1 + PodcastEpisode
.MAX_FILENAME_WITH_EXT_LENGTH
1493 return _("Warning: path to gPodder home (%(root)s) is very long "
1494 "and can result in failure to download files.\n" % {"root": root
}) \
1495 + _("You're advised to set it to a shorter path.")