1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2018 The gPodder Team
5 # Copyright (c) 2011 Neal H. Walfield
7 # gPodder is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # gPodder is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 # gpodder.model - Core model classes for gPodder (2009-08-13)
24 # Based on libpodcasts.py (thp, 2005-10-29)
40 from gpodder
import (coverart
, escapist_videos
, feedcore
, schema
, util
, vimeo
,
43 logger
= logging
.getLogger(__name__
)
49 """ abstract class for presenting a parsed feed to PodcastChannel """
52 """ :return str: the feed's title """
56 """ :return str: link to the feed's website """
59 def get_description(self
):
60 """ :return str: feed's textual description """
63 def get_cover_url(self
):
64 """ :return str: url of the feed's cover image """
67 def get_payment_url(self
):
68 """ :return str: optional -- feed's payment url """
71 def get_http_etag(self
):
72 """ :return str: optional -- last HTTP etag header, for conditional request next time """
75 def get_http_last_modified(self
):
76 """ :return str: optional -- last HTTP Last-Modified header, for conditional request next time """
79 def get_new_episodes(self
, channel
, existing_guids
):
81 Produce new episodes and update old ones.
82 Feed is a class to present results, so the feed shall have already been fetched.
83 Existing episodes not in all_seen_guids will be purged from the database.
84 :param PodcastChannel channel: the updated channel
85 :param dict(str, PodcastEpisode): existing episodes, by guid
86 :return (list(PodcastEpisode), set(str)): new_episodes, all_seen_guids
90 def get_next_page(self
, channel
, max_episodes
):
92 Paginated feed support (RFC 5005).
93 If the feed is paged, return the next feed page.
94 Returned page will in turn be asked for the next page, until None is returned.
95 :return feedcore.Result: the next feed's page,
96 as a fully parsed Feed or None
101 class PodcastParserFeed(Feed
):
102 def __init__(self
, feed
, fetcher
, max_episodes
=0):
104 self
.fetcher
= fetcher
105 self
.max_episodes
= max_episodes
108 return self
.feed
.get('title')
111 return self
.feed
.get('link')
113 def get_description(self
):
114 return self
.feed
.get('description')
116 def get_cover_url(self
):
117 return self
.feed
.get('cover_url')
119 def get_payment_url(self
):
120 return self
.feed
.get('payment_url')
122 def get_http_etag(self
):
123 return self
.feed
.get('headers', {}).get('etag')
125 def get_http_last_modified(self
):
126 return self
.feed
.get('headers', {}).get('last-modified')
128 def get_new_episodes(self
, channel
, existing_guids
):
129 # Keep track of episode GUIDs currently seen in the feed
132 # list of new episodes
135 # We have to sort the entries in descending chronological order,
136 # because if the feed lists items in ascending order and has >
137 # max_episodes old episodes, new episodes will not be shown.
138 # See also: gPodder Bug 1186
139 entries
= sorted(self
.feed
.get('episodes', []), key
=lambda episode
: episode
['published'], reverse
=True)
141 # We can limit the maximum number of entries that gPodder will parse
142 if self
.max_episodes
> 0 and len(entries
) > self
.max_episodes
:
143 entries
= entries
[:self
.max_episodes
]
145 # Search all entries for new episodes
146 for entry
in entries
:
147 episode
= channel
.EpisodeClass
.from_podcastparser_entry(entry
, channel
)
151 seen_guids
.add(episode
.guid
)
152 # Detect (and update) existing episode based on GUIDs
153 existing_episode
= existing_guids
.get(episode
.guid
, None)
155 existing_episode
.update_from(episode
)
156 existing_episode
.save()
160 new_episodes
.append(episode
)
161 return new_episodes
, seen_guids
163 def get_next_page(self
, channel
, max_episodes
):
164 if 'paged_feed_next' in self
.feed
:
165 url
= self
.feed
['paged_feed_next']
166 logger
.debug("get_next_page: feed has next %s", url
)
167 url
= channel
.authenticate_url(url
)
168 res
= self
.fetcher
.fetch(url
, max_episodes
=max_episodes
)
169 if res
.status
== feedcore
.UPDATED_FEED
:
170 res
.feed
= PodcastParserFeed(res
.feed
, self
.fetcher
, max_episodes
)
175 class gPodderFetcher(feedcore
.Fetcher
):
177 This class extends the feedcore Fetcher with the gPodder User-Agent and the
178 Proxy handler based on the current settings in gPodder.
182 def fetch_channel(self
, channel
, max_episodes
):
183 for handler
in self
.custom_handlers
:
184 custom_feed
= handler
.fetch_channel(channel
, max_episodes
)
185 if custom_feed
is not None:
187 # If we have a username or password, rebuild the url with them included
188 # Note: using a HTTPBasicAuthHandler would be pain because we need to
189 # know the realm. It can be done, but I think this method works, too
190 url
= channel
.authenticate_url(channel
.url
)
191 res
= self
.fetch(url
, channel
.http_etag
, channel
.http_last_modified
, max_episodes
)
192 if res
.status
== feedcore
.UPDATED_FEED
:
193 res
.feed
= PodcastParserFeed(res
.feed
, self
, max_episodes
)
196 def _resolve_url(self
, url
):
197 url
= youtube
.get_real_channel_url(url
)
198 url
= vimeo
.get_real_channel_url(url
)
199 url
= escapist_videos
.get_real_channel_url(url
)
203 def register(cls
, handler
):
204 cls
.custom_handlers
.append(handler
)
207 def unregister(cls
, handler
):
208 cls
.custom_handlers
.remove(handler
)
211 # The "register" method is exposed here for external usage
212 register_custom_handler
= gPodderFetcher
.register
214 # The "unregister" method is exposed here for external usage
215 unregister_custom_handler
= gPodderFetcher
.unregister
219 # database -> podcast -> episode -> download/playback
220 # podcast.parent == db
221 # podcast.children == [episode, ...]
222 # episode.parent == podcast
224 # - normally: episode.children = (None, None)
225 # - downloading: episode.children = (DownloadTask(), None)
226 # - playback: episode.children = (None, PlaybackTask())
229 class PodcastModelObject(object):
231 A generic base class for our podcast model providing common helper
232 and utility functions.
234 __slots__
= ('id', 'parent', 'children')
237 def create_from_dict(cls
, d
, *args
):
239 Create a new object, passing "args" to the constructor
240 and then updating the object with the values from "d".
244 # XXX: all(map(lambda k: hasattr(o, k), d))?
245 for k
, v
in d
.items():
251 class PodcastEpisode(PodcastModelObject
):
252 """holds data for one object in a channel"""
253 # In theory, Linux can have 255 bytes (not characters!) in a filename, but
254 # filesystems like eCryptFS store metadata in the filename, making the
255 # effective number of characters less than that. eCryptFS recommends
256 # 140 chars, we use 120 here (140 - len(extension) - len(".partial")).
257 # References: gPodder bug 1898, http://unix.stackexchange.com/a/32834
258 MAX_FILENAME_LENGTH
= 120 # without extension
259 MAX_FILENAME_WITH_EXT_LENGTH
= 140 - len(".partial") # with extension
261 __slots__
= schema
.EpisodeColumns
263 def _deprecated(self
):
264 raise Exception('Property is deprecated!')
266 is_played
= property(fget
=_deprecated
, fset
=_deprecated
)
267 is_locked
= property(fget
=_deprecated
, fset
=_deprecated
)
269 def has_website_link(self
):
270 return bool(self
.link
) and (self
.link
!= self
.url
or
271 youtube
.is_video_link(self
.link
))
274 def from_podcastparser_entry(cls
, entry
, channel
):
275 episode
= cls(channel
)
276 episode
.guid
= entry
['guid']
277 episode
.title
= entry
['title']
278 episode
.link
= entry
['link']
279 episode
.description
= entry
['description']
280 if entry
.get('description_html'):
281 episode
.description_html
= entry
['description_html']
283 episode
.total_time
= entry
['total_time']
284 episode
.published
= entry
['published']
285 episode
.payment_url
= entry
['payment_url']
287 audio_available
= any(enclosure
['mime_type'].startswith('audio/') for enclosure
in entry
['enclosures'])
288 video_available
= any(enclosure
['mime_type'].startswith('video/') for enclosure
in entry
['enclosures'])
290 for enclosure
in entry
['enclosures']:
291 episode
.mime_type
= enclosure
['mime_type']
293 # Skip images in feeds if audio or video is available (bug 979)
294 # This must (and does) also look in Media RSS enclosures (bug 1430)
295 if episode
.mime_type
.startswith('image/') and (audio_available
or video_available
):
298 # If we have audio or video available later on, skip
299 # 'application/octet-stream' data types (fixes Linux Outlaws)
300 if episode
.mime_type
== 'application/octet-stream' and (audio_available
or video_available
):
303 episode
.url
= util
.normalize_feed_url(enclosure
['url'])
307 episode
.file_size
= enclosure
['file_size']
310 # Brute-force detection of the episode link
311 episode
.url
= util
.normalize_feed_url(entry
['link'])
315 if any(mod
.is_video_link(episode
.url
) for mod
in (youtube
, vimeo
, escapist_videos
)):
318 # Check if we can resolve this link to a audio/video file
319 filename
, extension
= util
.filename_from_url(episode
.url
)
320 file_type
= util
.file_type_by_extension(extension
)
322 # The link points to a audio or video file - use it!
323 if file_type
is not None:
328 def __init__(self
, channel
):
329 self
.parent
= channel
330 self
.podcast_id
= self
.parent
.id
331 self
.children
= (None, None)
337 self
.mime_type
= 'application/octet-stream'
339 self
.description
= ''
340 self
.description_html
= ''
343 self
.download_filename
= None
344 self
.payment_url
= None
346 self
.state
= gpodder
.STATE_NORMAL
348 self
.archive
= channel
.auto_archive_episodes
352 self
.current_position
= 0
353 self
.current_position_updated
= 0
355 # Timestamp of last playback time
356 self
.last_playback
= 0
364 return self
.parent
.parent
.db
367 def trimmed_title(self
):
368 """Return the title with the common prefix trimmed"""
369 # Minimum amount of leftover characters after trimming. This
370 # avoids things like "Common prefix 123" to become just "123".
371 # If there are LEFTOVER_MIN or less characters after trimming,
372 # the original title will be returned without trimming.
375 # "Podcast Name - Title" and "Podcast Name: Title" -> "Title"
376 for postfix
in (' - ', ': '):
377 prefix
= self
.parent
.title
+ postfix
378 if (self
.title
.startswith(prefix
) and
379 len(self
.title
) - len(prefix
) > LEFTOVER_MIN
):
380 return self
.title
[len(prefix
):]
383 # "Podcast Name <number>: ..." -> "<number>: ..."
384 r
'^%s (\d+: .*)' % re
.escape(self
.parent
.title
),
386 # "Episode <number>: ..." -> "<number>: ..."
390 for pattern
in regex_patterns
:
391 if re
.match(pattern
, self
.title
):
392 title
= re
.sub(pattern
, r
'\1', self
.title
)
393 if len(title
) > LEFTOVER_MIN
:
396 # "#001: Title" -> "001: Title"
398 not self
.parent
._common
_prefix
and
399 re
.match('^#\d+: ', self
.title
) and
400 len(self
.title
) - 1 > LEFTOVER_MIN
):
401 return self
.title
[1:]
403 if (self
.parent
._common
_prefix
is not None and
404 self
.title
.startswith(self
.parent
._common
_prefix
) and
405 len(self
.title
) - len(self
.parent
._common
_prefix
) > LEFTOVER_MIN
):
406 return self
.title
[len(self
.parent
._common
_prefix
):]
410 def _set_download_task(self
, download_task
):
411 self
.children
= (download_task
, self
.children
[1])
413 def _get_download_task(self
):
414 return self
.children
[0]
416 download_task
= property(_get_download_task
, _set_download_task
)
419 def downloading(self
):
420 task
= self
.download_task
424 return task
.status
in (task
.DOWNLOADING
, task
.QUEUED
, task
.PAUSED
)
426 def check_is_new(self
):
427 return (self
.state
== gpodder
.STATE_NORMAL
and self
.is_new
and
428 not self
.downloading
)
431 gpodder
.user_extensions
.on_episode_save(self
)
432 self
.db
.save_episode(self
)
434 def on_downloaded(self
, filename
):
435 self
.state
= gpodder
.STATE_DOWNLOADED
437 self
.file_size
= os
.path
.getsize(filename
)
440 def set_state(self
, state
):
444 def playback_mark(self
):
446 self
.last_playback
= int(time
.time())
447 gpodder
.user_extensions
.on_episode_playback(self
)
450 def mark(self
, state
=None, is_played
=None, is_locked
=None):
451 if state
is not None:
453 if is_played
is not None:
454 self
.is_new
= not is_played
456 # "Mark as new" must "undelete" the episode
457 if self
.is_new
and self
.state
== gpodder
.STATE_DELETED
:
458 self
.state
= gpodder
.STATE_NORMAL
459 if is_locked
is not None:
460 self
.archive
= is_locked
463 def age_in_days(self
):
464 return util
.file_age_in_days(self
.local_filename(create
=False,
467 age_int_prop
= property(fget
=age_in_days
)
469 def get_age_string(self
):
470 return util
.file_age_to_string(self
.age_in_days())
472 age_prop
= property(fget
=get_age_string
)
474 def one_line_description(self
):
475 MAX_LINE_LENGTH
= 120
476 desc
= util
.remove_html_tags(self
.description
or '')
477 desc
= re
.sub('\s+', ' ', desc
).strip()
479 return _('No description available')
481 # Decode the description to avoid gPodder bug 1277
482 desc
= util
.convert_bytes(desc
).strip()
484 if len(desc
) > MAX_LINE_LENGTH
:
485 return desc
[:MAX_LINE_LENGTH
] + '...'
489 def delete_from_disk(self
):
490 filename
= self
.local_filename(create
=False, check_only
=True)
491 if filename
is not None:
492 gpodder
.user_extensions
.on_episode_delete(self
, filename
)
493 util
.delete_file(filename
)
495 self
.set_state(gpodder
.STATE_DELETED
)
497 def get_playback_url(self
, fmt_ids
=None, vimeo_fmt
=None, allow_partial
=False):
498 """Local (or remote) playback/streaming filename/URL
500 Returns either the local filename or a streaming URL that
501 can be used to playback this episode.
503 Also returns the filename of a partially downloaded file
504 in case partial (preview) playback is desired.
506 url
= self
.local_filename(create
=False)
508 if (allow_partial
and url
is not None and
509 os
.path
.exists(url
+ '.partial')):
510 return url
+ '.partial'
512 if url
is None or not os
.path
.exists(url
):
514 url
= youtube
.get_real_download_url(url
, fmt_ids
)
515 url
= vimeo
.get_real_download_url(url
, vimeo_fmt
)
516 url
= escapist_videos
.get_real_download_url(url
)
520 def find_unique_file_name(self
, filename
, extension
):
521 # Remove leading and trailing whitespace + dots (to avoid hidden files)
522 filename
= filename
.strip('.' + string
.whitespace
) + extension
524 for name
in util
.generate_names(filename
):
525 if (not self
.db
.episode_filename_exists(self
.podcast_id
, name
) or
526 self
.download_filename
== name
):
529 def local_filename(self
, create
, force_update
=False, check_only
=False,
530 template
=None, return_wanted_filename
=False):
531 """Get (and possibly generate) the local saving filename
533 Pass create=True if you want this function to generate a
534 new filename if none exists. You only want to do this when
535 planning to create/download the file after calling this function.
537 Normally, you should pass create=False. This will only
538 create a filename when the file already exists from a previous
539 version of gPodder (where we used md5 filenames). If the file
540 does not exist (and the filename also does not exist), this
541 function will return None.
543 If you pass force_update=True to this function, it will try to
544 find a new (better) filename and move the current file if this
545 is the case. This is useful if (during the download) you get
546 more information about the file, e.g. the mimetype and you want
547 to include this information in the file name generation process.
549 If check_only=True is passed to this function, it will never try
550 to rename the file, even if would be a good idea. Use this if you
551 only want to check if a file exists.
553 If "template" is specified, it should be a filename that is to
554 be used as a template for generating the "real" filename.
556 The generated filename is stored in the database for future access.
558 If return_wanted_filename is True, the filename will not be written to
559 the database, but simply returned by this function (for use by the
560 "import external downloads" feature).
562 if self
.download_filename
is None and (check_only
or not create
):
565 ext
= self
.extension(may_call_local_filename
=False)
567 if not check_only
and (force_update
or not self
.download_filename
):
568 # Avoid and catch gPodder bug 1440 and similar situations
570 logger
.warn('Empty template. Report this podcast URL %s',
574 # Try to find a new filename for the current file
575 if template
is not None:
576 # If template is specified, trust the template's extension
577 episode_filename
, ext
= os
.path
.splitext(template
)
579 episode_filename
, _
= util
.filename_from_url(self
.url
)
581 if 'redirect' in episode_filename
and template
is None:
582 # This looks like a redirection URL - force URL resolving!
583 logger
.warn('Looks like a redirection to me: %s', self
.url
)
584 url
= util
.get_real_url(self
.channel
.authenticate_url(self
.url
))
585 logger
.info('Redirection resolved to: %s', url
)
586 episode_filename
, _
= util
.filename_from_url(url
)
588 # Use title for YouTube, Vimeo and Soundcloud downloads
589 if (youtube
.is_video_link(self
.url
) or
590 vimeo
.is_video_link(self
.url
) or
591 escapist_videos
.is_video_link(self
.url
) or
592 episode_filename
== 'stream'):
593 episode_filename
= self
.title
595 # If the basename is empty, use the md5 hexdigest of the URL
596 if not episode_filename
or episode_filename
.startswith('redirect.'):
597 logger
.error('Report this feed: Podcast %s, episode %s',
598 self
.channel
.url
, self
.url
)
599 episode_filename
= hashlib
.md5(self
.url
.encode('utf-8')).hexdigest()
601 # Also sanitize ext (see #591 where ext=.mp3?dest-id=754182)
602 fn_template
, ext
= util
.sanitize_filename_ext(
605 self
.MAX_FILENAME_LENGTH
,
606 self
.MAX_FILENAME_WITH_EXT_LENGTH
)
607 # Find a unique filename for this episode
608 wanted_filename
= self
.find_unique_file_name(fn_template
, ext
)
610 if return_wanted_filename
:
611 # return the calculated filename without updating the database
612 return wanted_filename
614 # The old file exists, but we have decided to want a different filename
615 if self
.download_filename
and wanted_filename
!= self
.download_filename
:
616 # there might be an old download folder crawling around - move it!
617 new_file_name
= os
.path
.join(self
.channel
.save_dir
, wanted_filename
)
618 old_file_name
= os
.path
.join(self
.channel
.save_dir
, self
.download_filename
)
619 if os
.path
.exists(old_file_name
) and not os
.path
.exists(new_file_name
):
620 logger
.info('Renaming %s => %s', old_file_name
, new_file_name
)
621 os
.rename(old_file_name
, new_file_name
)
622 elif force_update
and not os
.path
.exists(old_file_name
):
623 # When we call force_update, the file might not yet exist when we
624 # call it from the downloading code before saving the file
625 logger
.info('Choosing new filename: %s', new_file_name
)
627 logger
.warn('%s exists or %s does not', new_file_name
, old_file_name
)
628 logger
.info('Updating filename of %s to "%s".', self
.url
, wanted_filename
)
629 elif self
.download_filename
is None:
630 logger
.info('Setting download filename: %s', wanted_filename
)
631 self
.download_filename
= wanted_filename
634 if return_wanted_filename
:
635 # return the filename, not full path
636 return self
.download_filename
637 return os
.path
.join(self
.channel
.save_dir
, self
.download_filename
)
639 def extension(self
, may_call_local_filename
=True):
640 filename
, ext
= util
.filename_from_url(self
.url
)
641 if may_call_local_filename
:
642 filename
= self
.local_filename(create
=False)
643 if filename
is not None:
644 filename
, ext
= os
.path
.splitext(filename
)
645 # if we can't detect the extension from the url fallback on the mimetype
646 if ext
== '' or util
.file_type_by_extension(ext
) is None:
647 ext
= util
.extension_from_mimetype(self
.mime_type
)
658 def file_exists(self
):
659 filename
= self
.local_filename(create
=False, check_only
=True)
663 return os
.path
.exists(filename
)
665 def was_downloaded(self
, and_exists
=False):
666 if self
.state
!= gpodder
.STATE_DOWNLOADED
:
668 if and_exists
and not self
.file_exists():
672 def sync_filename(self
, use_custom
=False, custom_format
=None):
674 return util
.object_string_formatter(custom_format
,
675 episode
=self
, podcast
=self
.channel
)
680 # Assume all YouTube/Vimeo links are video files
681 if youtube
.is_video_link(self
.url
) or vimeo
.is_video_link(self
.url
) or escapist_videos
.is_video_link(self
.url
):
684 return util
.file_type_by_extension(self
.extension())
688 return os
.path
.splitext(os
.path
.basename(self
.url
))[0]
693 Returns published time as HHMM (or 0000 if not available)
696 return datetime
.datetime
.fromtimestamp(self
.published
).strftime('%H%M')
698 logger
.warn('Cannot format pubtime: %s', self
.title
, exc_info
=True)
701 def playlist_title(self
):
702 """Return a title for this episode in a playlist
704 The title will be composed of the podcast name, the
705 episode name and the publication date. The return
706 value is the canonical representation of this episode
707 in playlists (for example, M3U playlists).
709 return '%s - %s (%s)' % (self
.channel
.title
,
713 def cute_pubdate(self
):
714 result
= util
.format_date(self
.published
)
716 return '(%s)' % _('unknown')
720 pubdate_prop
= property(fget
=cute_pubdate
)
722 def published_datetime(self
):
723 return datetime
.datetime
.fromtimestamp(self
.published
)
727 return self
.published_datetime().strftime('%Y-%m-%d')
730 def pubdate_day(self
):
731 return self
.published_datetime().strftime('%d')
734 def pubdate_month(self
):
735 return self
.published_datetime().strftime('%m')
738 def pubdate_year(self
):
739 return self
.published_datetime().strftime('%y')
741 def is_finished(self
):
742 """Return True if this episode is considered "finished playing"
744 An episode is considered "finished" when there is a
745 current position mark on the track, and when the
746 current position is greater than 99 percent of the
747 total time or inside the last 10 seconds of a track.
749 return (self
.current_position
> 0 and self
.total_time
> 0 and
750 (self
.current_position
+ 10 >= self
.total_time
or
751 self
.current_position
>= self
.total_time
* .99))
753 def get_play_info_string(self
, duration_only
=False):
754 duration
= util
.format_time(self
.total_time
)
755 if duration_only
and self
.total_time
> 0:
757 elif self
.is_finished():
758 return '%s (%s)' % (_('Finished'), duration
)
759 elif self
.current_position
> 0 and \
760 self
.current_position
!= self
.total_time
:
761 position
= util
.format_time(self
.current_position
)
762 return '%s / %s' % (position
, duration
)
763 elif self
.total_time
> 0:
768 def update_from(self
, episode
):
769 for k
in ('title', 'url', 'description', 'description_html', 'link', 'published', 'guid', 'payment_url'):
770 setattr(self
, k
, getattr(episode
, k
))
771 # Don't overwrite file size on downloaded episodes
772 # See #648 refreshing a youtube podcast clears downloaded file size
773 if self
.state
!= gpodder
.STATE_DOWNLOADED
:
774 setattr(self
, 'file_size', getattr(episode
, 'file_size'))
777 class PodcastChannel(PodcastModelObject
):
778 __slots__
= schema
.PodcastColumns
+ ('_common_prefix',)
780 UNICODE_TRANSLATE
= {ord('ö'): 'o', ord('ä'): 'a', ord('ü'): 'u'}
782 # Enumerations for download strategy
783 STRATEGY_DEFAULT
, STRATEGY_LATEST
= list(range(2))
785 # Description and ordering of strategies
787 (STRATEGY_DEFAULT
, _('Default')),
788 (STRATEGY_LATEST
, _('Only keep latest')),
791 MAX_FOLDERNAME_LENGTH
= 60
792 SECONDS_PER_DAY
= 24 * 60 * 60
793 SECONDS_PER_WEEK
= 7 * 24 * 60 * 60
794 EpisodeClass
= PodcastEpisode
796 feed_fetcher
= gPodderFetcher()
798 def __init__(self
, model
, id=None):
806 self
.description
= ''
807 self
.cover_url
= None
808 self
.payment_url
= None
810 self
.auth_username
= ''
811 self
.auth_password
= ''
813 self
.http_last_modified
= None
814 self
.http_etag
= None
816 self
.auto_archive_episodes
= False
817 self
.download_folder
= None
818 self
.pause_subscription
= False
819 self
.sync_to_mp3_player
= True
820 self
.cover_thumb
= None
822 self
.section
= _('Other')
823 self
._common
_prefix
= None
824 self
.download_strategy
= PodcastChannel
.STRATEGY_DEFAULT
827 self
.children
= self
.db
.load_episodes(self
, self
.episode_factory
)
828 self
._determine
_common
_prefix
()
836 return self
.parent
.db
838 def get_download_strategies(self
):
839 for value
, caption
in PodcastChannel
.STRATEGIES
:
840 yield self
.download_strategy
== value
, value
, caption
842 def set_download_strategy(self
, download_strategy
):
843 if download_strategy
== self
.download_strategy
:
846 caption
= dict(self
.STRATEGIES
).get(download_strategy
)
847 if caption
is not None:
848 logger
.debug('Strategy for %s changed to %s', self
.title
, caption
)
849 self
.download_strategy
= download_strategy
851 logger
.warn('Cannot set strategy to %d', download_strategy
)
853 def rewrite_url(self
, new_url
):
854 new_url
= util
.normalize_feed_url(new_url
)
859 self
.http_etag
= None
860 self
.http_last_modified
= None
864 def check_download_folder(self
):
865 """Check the download folder for externally-downloaded files
867 This will try to assign downloaded files with episodes in the
870 This will also cause missing files to be marked as deleted.
874 for episode
in self
.get_episodes(gpodder
.STATE_DOWNLOADED
):
875 if episode
.was_downloaded():
876 filename
= episode
.local_filename(create
=False)
878 # No filename has been determined for this episode
881 if not os
.path
.exists(filename
):
882 # File has been deleted by the user - simulate a
883 # delete event (also marks the episode as deleted)
884 logger
.debug('Episode deleted: %s', filename
)
885 episode
.delete_from_disk()
888 known_files
.add(filename
)
890 existing_files
= set(filename
for filename
in
891 glob
.glob(os
.path
.join(self
.save_dir
, '*'))
892 if not filename
.endswith('.partial'))
894 ignore_files
= ['folder' + ext
for ext
in
895 coverart
.CoverDownloader
.EXTENSIONS
]
897 external_files
= existing_files
.difference(list(known_files
) +
898 [os
.path
.join(self
.save_dir
, ignore_file
)
899 for ignore_file
in ignore_files
])
900 if not external_files
:
903 all_episodes
= self
.get_all_episodes()
905 for filename
in external_files
:
908 basename
= os
.path
.basename(filename
)
909 existing
= [e
for e
in all_episodes
if e
.download_filename
== basename
]
911 existing
= existing
[0]
912 logger
.info('Importing external download: %s', filename
)
913 existing
.on_downloaded(filename
)
916 for episode
in all_episodes
:
917 wanted_filename
= episode
.local_filename(create
=True,
918 return_wanted_filename
=True)
919 if basename
== wanted_filename
:
920 logger
.info('Importing external download: %s', filename
)
921 episode
.download_filename
= basename
922 episode
.on_downloaded(filename
)
926 wanted_base
, wanted_ext
= os
.path
.splitext(wanted_filename
)
927 target_base
, target_ext
= os
.path
.splitext(basename
)
928 if wanted_base
== target_base
:
929 # Filenames only differ by the extension
930 wanted_type
= util
.file_type_by_extension(wanted_ext
)
931 target_type
= util
.file_type_by_extension(target_ext
)
933 # If wanted type is None, assume that we don't know
934 # the right extension before the download (e.g. YouTube)
935 # if the wanted type is the same as the target type,
936 # assume that it's the correct file
937 if wanted_type
is None or wanted_type
== target_type
:
938 logger
.info('Importing external download: %s', filename
)
939 episode
.download_filename
= basename
940 episode
.on_downloaded(filename
)
944 if not found
and not util
.is_system_file(filename
):
945 logger
.warn('Unknown external file: %s', filename
)
948 def sort_key(cls
, podcast
):
949 key
= util
.convert_bytes(podcast
.title
.lower())
950 return re
.sub('^the ', '', key
).translate(cls
.UNICODE_TRANSLATE
)
953 def load(cls
, model
, url
, create
=True, authentication_tokens
=None, max_episodes
=0):
954 existing
= [p
for p
in model
.get_podcasts() if p
.url
== url
]
962 if authentication_tokens
is not None:
963 tmp
.auth_username
= authentication_tokens
[0]
964 tmp
.auth_password
= authentication_tokens
[1]
966 # Save podcast, so it gets an ID assigned before
967 # updating the feed and adding saving episodes
971 tmp
.update(max_episodes
)
972 except Exception as e
:
973 logger
.debug('Fetch failed. Removing buggy feed.')
974 tmp
.remove_downloaded()
978 # Determine the section in which this podcast should appear
979 tmp
.section
= tmp
._get
_content
_type
()
981 # Determine a new download folder now that we have the title
982 tmp
.get_save_dir(force_new
=True)
984 # Mark episodes as downloaded if files already exist (bug 902)
985 tmp
.check_download_folder()
987 # Determine common prefix of episode titles
988 tmp
._determine
_common
_prefix
()
992 gpodder
.user_extensions
.on_podcast_subscribe(tmp
)
996 def episode_factory(self
, d
):
998 This function takes a dictionary containing key-value pairs for
999 episodes and returns a new PodcastEpisode object that is connected
1002 Returns: A new PodcastEpisode object
1004 return self
.EpisodeClass
.create_from_dict(d
, self
)
1006 def _consume_updated_title(self
, new_title
):
1007 # Replace multi-space and newlines with single space (Maemo bug 11173)
1008 new_title
= re
.sub('\s+', ' ', new_title
).strip()
1010 # Only update the podcast-supplied title when we
1011 # don't yet have a title, or if the title is the
1012 # feed URL (e.g. we didn't find a title before).
1013 if not self
.title
or self
.title
== self
.url
:
1014 self
.title
= new_title
1016 # Start YouTube- and Vimeo-specific title FIX
1017 YOUTUBE_PREFIX
= 'Uploads by '
1018 VIMEO_PREFIX
= 'Vimeo / '
1019 if self
.title
.startswith(YOUTUBE_PREFIX
):
1020 self
.title
= self
.title
[len(YOUTUBE_PREFIX
):] + ' on YouTube'
1021 elif self
.title
.startswith(VIMEO_PREFIX
):
1022 self
.title
= self
.title
[len(VIMEO_PREFIX
):] + ' on Vimeo'
1023 # End YouTube- and Vimeo-specific title FIX
1025 def _consume_metadata(self
, title
, link
, description
, cover_url
,
1027 self
._consume
_updated
_title
(title
)
1029 self
.description
= description
1030 self
.cover_url
= cover_url
1031 self
.payment_url
= payment_url
1034 def _consume_updated_feed(self
, feed
, max_episodes
=0):
1035 self
._consume
_metadata
(feed
.get_title() or self
.url
,
1036 feed
.get_link() or self
.link
,
1037 feed
.get_description() or '',
1038 feed
.get_cover_url() or None,
1039 feed
.get_payment_url() or None)
1041 # Update values for HTTP conditional requests
1042 self
.http_etag
= feed
.get_http_etag() or self
.http_etag
1043 self
.http_last_modified
= feed
.get_http_last_modified() or self
.http_last_modified
1045 # Load all episodes to update them properly.
1046 existing
= self
.get_all_episodes()
1047 # GUID-based existing episode list
1048 existing_guids
= {e
.guid
: e
for e
in existing
}
1050 # Get most recent published of all episodes
1051 last_published
= self
.db
.get_last_published(self
) or 0
1052 # fix for #516 an episode was marked published one month in the future (typo in month number)
1053 # causing every new episode to be marked old
1054 tomorrow
= datetime
.datetime
.now().timestamp() + self
.SECONDS_PER_DAY
1055 if last_published
> tomorrow
:
1056 logger
.debug('Episode published in the future for podcast %s', self
.title
)
1057 last_published
= tomorrow
1059 # new episodes from feed
1060 new_episodes
, seen_guids
= feed
.get_new_episodes(self
, existing_guids
)
1064 next_max_episodes
= max_episodes
- len(seen_guids
)
1065 # want to paginate if:
1066 # - we raised the max episode count so we want more old episodes now
1067 # FIXME: could also be that feed has less episodes than max_episodes and we're paginating for nothing
1068 # - all episodes are new so we continue getting them until max_episodes is reached
1069 could_have_more
= max_episodes
> len(existing
) or len(new_episodes
) == len(seen_guids
)
1070 while next_feed
and could_have_more
:
1071 if max_episodes
> 0 and next_max_episodes
<= 0:
1072 logger
.debug("stopping pagination: seen enough episodes (%i)", max_episodes
)
1074 # brand new: try to load another page!
1075 next_result
= next_feed
.get_next_page(self
, next_max_episodes
)
1076 if next_result
and next_result
.status
== feedcore
.UPDATED_FEED
:
1077 next_feed
= next_result
.feed
1078 for e
in new_episodes
:
1079 existing_guids
[e
.guid
] = e
1080 next_new_episodes
, next_seen_guids
= next_feed
.get_new_episodes(self
, existing_guids
)
1081 logger
.debug("next page has %i new episodes", len(next_new_episodes
))
1082 next_max_episodes
-= len(next_seen_guids
)
1083 new_episodes
+= next_new_episodes
1084 seen_guids
= seen_guids
.union(next_seen_guids
)
1088 # mark episodes not new
1089 real_new_episode_count
= 0
1090 # Search all entries for new episodes
1091 for episode
in new_episodes
:
1092 # Workaround for bug 340: If the episode has been
1093 # published earlier than one week before the most
1094 # recent existing episode, do not mark it as new.
1095 if episode
.published
< last_published
- self
.SECONDS_PER_WEEK
:
1096 logger
.debug('Episode with old date: %s', episode
.title
)
1097 episode
.is_new
= False
1101 real_new_episode_count
+= 1
1103 # Only allow a certain number of new episodes per update
1104 if (self
.download_strategy
== PodcastChannel
.STRATEGY_LATEST
and
1105 real_new_episode_count
> 1):
1106 episode
.is_new
= False
1109 self
.children
.extend(new_episodes
)
1111 self
.remove_unreachable_episodes(existing
, seen_guids
, max_episodes
)
1113 def remove_unreachable_episodes(self
, existing
, seen_guids
, max_episodes
):
1114 # Remove "unreachable" episodes - episodes that have not been
1115 # downloaded and that the feed does not list as downloadable anymore
1116 # Keep episodes that are currently being downloaded, though (bug 1534)
1117 if self
.id is not None:
1118 episodes_to_purge
= [e
for e
in existing
if
1119 e
.state
!= gpodder
.STATE_DOWNLOADED
and
1120 e
.guid
not in seen_guids
and not e
.downloading
]
1122 for episode
in episodes_to_purge
:
1123 logger
.debug('Episode removed from feed: %s (%s)',
1124 episode
.title
, episode
.guid
)
1125 gpodder
.user_extensions
.on_episode_removed_from_podcast(episode
)
1126 self
.db
.delete_episode_by_guid(episode
.guid
, self
.id)
1128 # Remove the episode from the "children" episodes list
1129 if self
.children
is not None:
1130 self
.children
.remove(episode
)
1132 # This *might* cause episodes to be skipped if there were more than
1133 # max_episodes_per_feed items added to the feed between updates.
1134 # The benefit is that it prevents old episodes from apearing as new
1135 # in certain situations (see bug #340).
1136 self
.db
.purge(max_episodes
, self
.id) # TODO: Remove from self.children!
1138 # Sort episodes by pubdate, descending
1139 self
.children
.sort(key
=lambda e
: e
.published
, reverse
=True)
1141 def update(self
, max_episodes
=0):
1142 max_episodes
= int(max_episodes
)
1144 result
= self
.feed_fetcher
.fetch_channel(self
, max_episodes
)
1146 if result
.status
== feedcore
.UPDATED_FEED
:
1147 self
._consume
_updated
_feed
(result
.feed
, max_episodes
)
1148 elif result
.status
== feedcore
.NEW_LOCATION
:
1150 logger
.info('New feed location: %s => %s', self
.url
, url
)
1151 if url
in set(x
.url
for x
in self
.model
.get_podcasts()):
1152 raise Exception('Already subscribed to ' + url
)
1154 # With the updated URL, fetch the feed again
1155 self
.update(max_episodes
)
1157 elif result
.status
== feedcore
.NOT_MODIFIED
:
1161 except Exception as e
:
1162 # "Not really" errors
1163 # feedcore.AuthenticationRequired
1166 # feedcore.BadRequest
1167 # feedcore.InternalServerError
1168 # feedcore.WifiLogin
1170 # feedcore.Unsubscribe
1172 # feedcore.InvalidFeed
1173 # feedcore.UnknownStatusCode
1174 gpodder
.user_extensions
.on_podcast_update_failed(self
, e
)
1177 gpodder
.user_extensions
.on_podcast_updated(self
)
1179 # Re-determine the common prefix for all episodes
1180 self
._determine
_common
_prefix
()
1185 self
.db
.delete_podcast(self
)
1186 self
.model
._remove
_podcast
(self
)
1189 if self
.download_folder
is None:
1192 gpodder
.user_extensions
.on_podcast_save(self
)
1194 self
.db
.save_podcast(self
)
1195 self
.model
._append
_podcast
(self
)
1197 def get_statistics(self
):
1199 return (0, 0, 0, 0, 0)
1201 return self
.db
.get_podcast_statistics(self
.id)
1205 if not self
.section
:
1206 self
.section
= self
._get
_content
_type
()
1211 def _get_content_type(self
):
1212 if 'youtube.com' in self
.url
or 'vimeo.com' in self
.url
or 'escapistmagazine.com' in self
.url
:
1215 audio
, video
, other
= 0, 0, 0
1216 for content_type
in self
.db
.get_content_types(self
.id):
1217 content_type
= content_type
.lower()
1218 if content_type
.startswith('audio'):
1220 elif content_type
.startswith('video'):
1232 def authenticate_url(self
, url
):
1233 return util
.url_add_authentication(url
, self
.auth_username
, self
.auth_password
)
1235 def rename(self
, new_title
):
1236 new_title
= new_title
.strip()
1237 if self
.title
== new_title
:
1240 new_folder_name
= self
.find_unique_folder_name(new_title
)
1241 if new_folder_name
and new_folder_name
!= self
.download_folder
:
1242 new_folder
= os
.path
.join(gpodder
.downloads
, new_folder_name
)
1243 old_folder
= os
.path
.join(gpodder
.downloads
, self
.download_folder
)
1244 if os
.path
.exists(old_folder
):
1245 if not os
.path
.exists(new_folder
):
1246 # Old folder exists, new folder does not -> simply rename
1247 logger
.info('Renaming %s => %s', old_folder
, new_folder
)
1248 os
.rename(old_folder
, new_folder
)
1250 # Both folders exist -> move files and delete old folder
1251 logger
.info('Moving files from %s to %s', old_folder
,
1253 for file in glob
.glob(os
.path
.join(old_folder
, '*')):
1254 shutil
.move(file, new_folder
)
1255 logger
.info('Removing %s', old_folder
)
1256 shutil
.rmtree(old_folder
, ignore_errors
=True)
1257 self
.download_folder
= new_folder_name
1259 self
.title
= new_title
1262 def _determine_common_prefix(self
):
1263 # We need at least 2 episodes for the prefix to be "common" ;)
1264 if len(self
.children
) < 2:
1265 self
._common
_prefix
= ''
1268 prefix
= os
.path
.commonprefix([x
.title
for x
in self
.children
])
1269 # The common prefix must end with a space - otherwise it's not
1270 # on a word boundary, and we might end up chopping off too much
1271 if prefix
and prefix
[-1] != ' ':
1272 prefix
= prefix
[:prefix
.rfind(' ') + 1]
1274 self
._common
_prefix
= prefix
1276 def get_all_episodes(self
):
1277 return self
.children
1279 def get_episodes(self
, state
):
1280 return [e
for e
in self
.get_all_episodes() if e
.state
== state
]
1282 def find_unique_folder_name(self
, download_folder
):
1283 # Remove trailing dots to avoid errors on Windows (bug 600)
1284 # Also remove leading dots to avoid hidden folders on Linux
1285 download_folder
= download_folder
.strip('.' + string
.whitespace
)
1287 for folder_name
in util
.generate_names(download_folder
):
1288 if (not self
.db
.podcast_download_folder_exists(folder_name
) or
1289 self
.download_folder
== folder_name
):
1292 def get_save_dir(self
, force_new
=False):
1293 if self
.download_folder
is None or force_new
:
1294 # we must change the folder name, because it has not been set manually
1295 fn_template
= util
.sanitize_filename(self
.title
, self
.MAX_FOLDERNAME_LENGTH
)
1298 fn_template
= util
.sanitize_filename(self
.url
, self
.MAX_FOLDERNAME_LENGTH
)
1300 # Find a unique folder name for this podcast
1301 download_folder
= self
.find_unique_folder_name(fn_template
)
1303 # Try removing the download folder if it has been created previously
1304 if self
.download_folder
is not None:
1305 folder
= os
.path
.join(gpodder
.downloads
, self
.download_folder
)
1309 logger
.info('Old download folder is kept for %s', self
.url
)
1311 logger
.info('Updating download_folder of %s to %s', self
.url
,
1313 self
.download_folder
= download_folder
1316 save_dir
= os
.path
.join(gpodder
.downloads
, self
.download_folder
)
1318 # Create save_dir if it does not yet exist
1319 if not util
.make_directory(save_dir
):
1320 logger
.error('Could not create save_dir: %s', save_dir
)
1324 save_dir
= property(fget
=get_save_dir
)
1326 def remove_downloaded(self
):
1327 # Remove the download directory
1328 for episode
in self
.get_episodes(gpodder
.STATE_DOWNLOADED
):
1329 filename
= episode
.local_filename(create
=False, check_only
=True)
1330 if filename
is not None:
1331 gpodder
.user_extensions
.on_episode_delete(episode
, filename
)
1333 shutil
.rmtree(self
.save_dir
, True)
1336 def cover_file(self
):
1337 return os
.path
.join(self
.save_dir
, 'folder')
1340 class Model(object):
1341 PodcastClass
= PodcastChannel
1343 def __init__(self
, db
):
1345 self
.children
= None
1347 def _append_podcast(self
, podcast
):
1348 if podcast
not in self
.children
:
1349 self
.children
.append(podcast
)
1351 def _remove_podcast(self
, podcast
):
1352 self
.children
.remove(podcast
)
1353 gpodder
.user_extensions
.on_podcast_delete(self
)
1355 def get_podcasts(self
):
1356 def podcast_factory(dct
, db
):
1357 return self
.PodcastClass
.create_from_dict(dct
, self
, dct
['id'])
1359 if self
.children
is None:
1360 self
.children
= self
.db
.load_podcasts(podcast_factory
)
1362 # Check download folders for changes (bug 902)
1363 for podcast
in self
.children
:
1364 podcast
.check_download_folder()
1366 return self
.children
1368 def get_podcast(self
, url
):
1369 for p
in self
.get_podcasts():
1374 def load_podcast(self
, url
, create
=True, authentication_tokens
=None,
1376 assert all(url
!= podcast
.url
for podcast
in self
.get_podcasts())
1377 return self
.PodcastClass
.load(self
, url
, create
,
1378 authentication_tokens
,
1382 def podcast_sort_key(cls
, podcast
):
1383 return cls
.PodcastClass
.sort_key(podcast
)
1386 def episode_sort_key(cls
, episode
):
1387 return episode
.published
1390 def sort_episodes_by_pubdate(cls
, episodes
, reverse
=False):
1391 """Sort a list of PodcastEpisode objects chronologically
1393 Returns a iterable, sorted sequence of the episodes
1395 return sorted(episodes
, key
=cls
.episode_sort_key
, reverse
=reverse
)
1398 def check_root_folder_path():
1400 if gpodder
.ui
.win32
:
1401 longest
= len(root
) \
1402 + 1 + PodcastChannel
.MAX_FOLDERNAME_LENGTH \
1403 + 1 + PodcastEpisode
.MAX_FILENAME_WITH_EXT_LENGTH
1405 return _("Warning: path to gPodder home (%(root)s) is very long "
1406 "and can result in failure to download files.\n" % {"root": root
}) \
1407 + _("You're advised to set it to a shorter path.")