1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # gpodder.model - Core model classes for gPodder (2009-08-13)
23 # Based on libpodcasts.py (thp, 2005-10-29)
27 from gpodder
import util
28 from gpodder
import feedcore
29 from gpodder
import youtube
30 from gpodder
import corestats
31 from gpodder
import gstreamer
33 from gpodder
.liblogger
import log
44 import xml
.sax
.saxutils
49 class CustomFeed(feedcore
.ExceptionWithData
): pass
51 class gPodderFetcher(feedcore
.Fetcher
):
53 This class extends the feedcore Fetcher with the gPodder User-Agent and the
54 Proxy handler based on the current settings in gPodder and provides a
55 convenience method (fetch_channel) for use by PodcastChannel objects.
60 feedcore
.Fetcher
.__init
__(self
, gpodder
.user_agent
)
62 def fetch_channel(self
, channel
):
64 modified
= feedparser
._parse
_date
(channel
.last_modified
)
65 # If we have a username or password, rebuild the url with them included
66 # Note: using a HTTPBasicAuthHandler would be pain because we need to
67 # know the realm. It can be done, but I think this method works, too
68 url
= channel
.authenticate_url(channel
.url
)
69 for handler
in self
.custom_handlers
:
70 custom_feed
= handler
.handle_url(url
)
71 if custom_feed
is not None:
72 raise CustomFeed(custom_feed
)
73 self
.fetch(url
, etag
, modified
)
75 def _resolve_url(self
, url
):
76 return youtube
.get_real_channel_url(url
)
79 def register(cls
, handler
):
80 cls
.custom_handlers
.append(handler
)
82 # def _get_handlers(self):
83 # # Add a ProxyHandler for fetching data via a proxy server
84 # proxies = {'http': 'http://proxy.example.org:8080'}
85 # return[urllib2.ProxyHandler(proxies))]
87 # The "register" method is exposed here for external usage
88 register_custom_handler
= gPodderFetcher
.register
90 class PodcastModelObject(object):
92 A generic base class for our podcast model providing common helper
93 and utility functions.
97 def create_from_dict(cls
, d
, *args
):
99 Create a new object, passing "args" to the constructor
100 and then updating the object with the values from "d".
103 o
.update_from_dict(d
)
106 def update_from_dict(self
, d
):
108 Updates the attributes of this object with values from the
109 dictionary "d" by using the keys found in "d".
113 setattr(self
, k
, d
[k
])
116 class PodcastChannel(PodcastModelObject
):
117 """holds data for a complete channel"""
118 MAX_FOLDERNAME_LENGTH
= 150
119 SECONDS_PER_WEEK
= 7*24*60*60
121 feed_fetcher
= gPodderFetcher()
124 def build_factory(cls
, download_dir
):
125 def factory(dict, db
):
126 return cls
.create_from_dict(dict, db
, download_dir
)
130 def load_from_db(cls
, db
, download_dir
):
131 return db
.load_channels(factory
=cls
.build_factory(download_dir
))
134 def load(cls
, db
, url
, create
=True, authentication_tokens
=None,\
135 max_episodes
=0, download_dir
=None, allow_empty_feeds
=False, \
137 if isinstance(url
, unicode):
138 url
= url
.encode('utf-8')
140 tmp
= db
.load_channels(factory
=cls
.build_factory(download_dir
), url
=url
)
144 tmp
= PodcastChannel(db
, download_dir
)
146 if authentication_tokens
is not None:
147 tmp
.username
= authentication_tokens
[0]
148 tmp
.password
= authentication_tokens
[1]
150 tmp
.update(max_episodes
, mimetype_prefs
)
152 db
.force_last_new(tmp
)
153 # Subscribing to empty feeds should yield an error (except if
154 # the user specifically allows empty feeds in the config UI)
155 if sum(tmp
.get_statistics()) == 0 and not allow_empty_feeds
:
157 raise Exception(_('No downloadable episodes in feed'))
160 def episode_factory(self
, d
, db__parameter_is_unused
=None):
162 This function takes a dictionary containing key-value pairs for
163 episodes and returns a new PodcastEpisode object that is connected
164 to this PodcastChannel object.
166 Returns: A new PodcastEpisode object
168 return PodcastEpisode
.create_from_dict(d
, self
)
170 def _consume_custom_feed(self
, custom_feed
, max_episodes
=0):
171 self
.title
= custom_feed
.get_title()
172 self
.link
= custom_feed
.get_link()
173 self
.description
= custom_feed
.get_description()
174 self
.image
= custom_feed
.get_image()
175 self
.pubDate
= time
.time()
178 guids
= [episode
.guid
for episode
in self
.get_all_episodes()]
180 # Insert newly-found episodes into the database
181 custom_feed
.get_new_episodes(self
, guids
)
185 self
.db
.purge(max_episodes
, self
.id)
187 def _consume_updated_feed(self
, feed
, max_episodes
=0, mimetype_prefs
=''):
188 self
.parse_error
= feed
.get('bozo_exception', None)
190 # Replace multi-space and newlines with single space (Maemo bug 11173)
191 self
.title
= re
.sub('\s+', ' ', feed
.feed
.get('title', self
.url
))
193 self
.link
= feed
.feed
.get('link', self
.link
)
194 self
.description
= feed
.feed
.get('subtitle', self
.description
)
195 # Start YouTube-specific title FIX
196 YOUTUBE_PREFIX
= 'Uploads by '
197 if self
.title
.startswith(YOUTUBE_PREFIX
):
198 self
.title
= self
.title
[len(YOUTUBE_PREFIX
):] + ' on YouTube'
199 # End YouTube-specific title FIX
202 self
.pubDate
= rfc822
.mktime_tz(feed
.feed
.get('updated_parsed', None+(0,)))
204 self
.pubDate
= time
.time()
206 if hasattr(feed
.feed
, 'image'):
207 for attribute
in ('href', 'url'):
208 new_value
= getattr(feed
.feed
.image
, attribute
, None)
209 if new_value
is not None:
210 log('Found cover art in %s: %s', attribute
, new_value
)
211 self
.image
= new_value
213 if hasattr(feed
.feed
, 'icon'):
214 self
.image
= feed
.feed
.icon
218 # Load all episodes to update them properly.
219 existing
= self
.get_all_episodes()
221 # We can limit the maximum number of entries that gPodder will parse
222 if max_episodes
> 0 and len(feed
.entries
) > max_episodes
:
223 # We have to sort the entries in descending chronological order,
224 # because if the feed lists items in ascending order and has >
225 # max_episodes old episodes, new episodes will not be shown.
226 # See also: gPodder Bug 1186
228 entries
= sorted(feed
.entries
, \
229 key
=lambda x
: x
.get('updated_parsed', (0,)*9), \
230 reverse
=True)[:max_episodes
]
232 log('Could not sort episodes: %s', e
, sender
=self
, traceback
=True)
233 entries
= feed
.entries
[:max_episodes
]
235 entries
= feed
.entries
237 # Title + PubDate hashes for existing episodes
238 existing_dupes
= dict((e
.duplicate_id(), e
) for e
in existing
)
240 # GUID-based existing episode list
241 existing_guids
= dict((e
.guid
, e
) for e
in existing
)
243 # Get most recent pubDate of all episodes
244 last_pubdate
= self
.db
.get_last_pubdate(self
) or 0
246 # Search all entries for new episodes
247 for entry
in entries
:
249 episode
= PodcastEpisode
.from_feedparser_entry(entry
, self
, mimetype_prefs
)
250 if episode
is not None and not episode
.title
:
251 episode
.title
, ext
= os
.path
.splitext(os
.path
.basename(episode
.url
))
253 log('Cannot instantiate episode: %s. Skipping.', e
, sender
=self
, traceback
=True)
259 # Detect (and update) existing episode based on GUIDs
260 existing_episode
= existing_guids
.get(episode
.guid
, None)
262 existing_episode
.update_from(episode
)
263 existing_episode
.save()
266 # Detect (and update) existing episode based on duplicate ID
267 existing_episode
= existing_dupes
.get(episode
.duplicate_id(), None)
269 if existing_episode
.is_duplicate(episode
):
270 existing_episode
.update_from(episode
)
271 existing_episode
.save()
274 # Workaround for bug 340: If the episode has been
275 # published earlier than one week before the most
276 # recent existing episode, do not mark it as new.
277 if episode
.pubDate
< last_pubdate
- self
.SECONDS_PER_WEEK
:
278 log('Episode with old date: %s', episode
.title
, sender
=self
)
279 episode
.is_played
= True
283 # Remove "unreachable" episodes - episodes that have not been
284 # downloaded and that the feed does not list as downloadable anymore
285 if self
.id is not None:
286 seen_guids
= set(e
.guid
for e
in feed
.entries
if hasattr(e
, 'guid'))
287 episodes_to_purge
= (e
for e
in existing
if \
288 e
.state
!= gpodder
.STATE_DOWNLOADED
and \
289 e
.guid
not in seen_guids
and e
.guid
is not None)
290 for episode
in episodes_to_purge
:
291 log('Episode removed from feed: %s (%s)', episode
.title
, \
292 episode
.guid
, sender
=self
)
293 self
.db
.delete_episode_by_guid(episode
.guid
, self
.id)
295 # This *might* cause episodes to be skipped if there were more than
296 # max_episodes_per_feed items added to the feed between updates.
297 # The benefit is that it prevents old episodes from apearing as new
298 # in certain situations (see bug #340).
299 self
.db
.purge(max_episodes
, self
.id)
301 def update_channel_lock(self
):
302 self
.db
.update_channel_lock(self
)
304 def _update_etag_modified(self
, feed
):
305 self
.updated_timestamp
= time
.time()
306 self
.calculate_publish_behaviour()
307 self
.etag
= feed
.headers
.get('etag', self
.etag
)
308 self
.last_modified
= feed
.headers
.get('last-modified', self
.last_modified
)
310 def query_automatic_update(self
):
311 """Query if this channel should be updated automatically
313 Returns True if the update should happen in automatic
314 mode or False if this channel should be skipped (timeout
315 not yet reached or release not expected right now).
317 updated
= self
.updated_timestamp
318 expected
= self
.release_expected
321 one_day_ago
= now
- 60*60*24
322 lastcheck
= now
- 60*10
324 return updated
< one_day_ago
or \
325 (expected
< now
and updated
< lastcheck
)
327 def update(self
, max_episodes
=0, mimetype_prefs
=''):
329 self
.feed_fetcher
.fetch_channel(self
)
330 except CustomFeed
, updated
:
331 custom_feed
= updated
.data
332 self
._consume
_custom
_feed
(custom_feed
, max_episodes
)
334 except feedcore
.UpdatedFeed
, updated
:
336 self
._consume
_updated
_feed
(feed
, max_episodes
, mimetype_prefs
)
337 self
._update
_etag
_modified
(feed
)
339 except feedcore
.NewLocation
, updated
:
342 self
._consume
_updated
_feed
(feed
, max_episodes
, mimetype_prefs
)
343 self
._update
_etag
_modified
(feed
)
345 except feedcore
.NotModified
, updated
:
347 self
._update
_etag
_modified
(feed
)
350 # "Not really" errors
351 #feedcore.AuthenticationRequired
355 #feedcore.InternalServerError
358 #feedcore.Unsubscribe
360 #feedcore.InvalidFeed
361 #feedcore.UnknownStatusCode
364 if gpodder
.user_hooks
is not None:
365 gpodder
.user_hooks
.on_podcast_updated(self
)
370 self
.db
.delete_channel(self
)
373 if gpodder
.user_hooks
is not None:
374 gpodder
.user_hooks
.on_podcast_save(self
)
375 if self
.foldername
is None:
376 # get_save_dir() finds a unique value for foldername
378 self
.db
.save_channel(self
)
380 def get_statistics(self
):
382 return (0, 0, 0, 0, 0)
384 return self
.db
.get_channel_count(int(self
.id))
386 def authenticate_url(self
, url
):
387 return util
.url_add_authentication(url
, self
.username
, self
.password
)
389 def __init__(self
, db
, download_dir
):
391 self
.download_dir
= download_dir
396 self
.description
= ''
399 self
.parse_error
= None
400 self
.foldername
= None
401 self
.auto_foldername
= 1 # automatically generated foldername
403 # should this channel be synced to devices? (ex: iPod)
404 self
.sync_to_devices
= True
405 # to which playlist should be synced
406 self
.device_playlist_name
= 'gPodder'
407 # if set, this overrides the channel-provided title
408 self
.override_title
= ''
412 self
.last_modified
= None
415 self
.save_dir_size
= 0
416 self
.__save
_dir
_size
_set
= False
418 self
.channel_is_locked
= False
420 self
.release_expected
= time
.time()
421 self
.release_deviation
= 0
422 self
.updated_timestamp
= 0
423 self
.feed_update_enabled
= True
425 def calculate_publish_behaviour(self
):
426 episodes
= self
.db
.load_episodes(self
, factory
=self
.episode_factory
, limit
=30)
427 if len(episodes
) < 3:
431 latest
= max(e
.pubDate
for e
in episodes
)
432 for index
in range(len(episodes
)-1):
433 if episodes
[index
].pubDate
!= 0 and episodes
[index
+1].pubDate
!= 0:
434 deltas
.append(episodes
[index
].pubDate
- episodes
[index
+1].pubDate
)
437 stats
= corestats
.Stats(deltas
)
438 self
.release_expected
= min([latest
+stats
.stdev(), latest
+(stats
.min()+stats
.avg())*.5])
439 self
.release_deviation
= stats
.stdev()
441 self
.release_expected
= latest
442 self
.release_deviation
= 0
444 def request_save_dir_size(self
):
445 if not self
.__save
_dir
_size
_set
:
446 self
.update_save_dir_size()
447 self
.__save
_dir
_size
_set
= True
449 def update_save_dir_size(self
):
450 self
.save_dir_size
= util
.calculate_size(self
.save_dir
)
452 def get_title( self
):
453 if self
.override_title
:
454 return self
.override_title
455 elif not self
.__title
.strip():
460 def set_title( self
, value
):
461 self
.__title
= value
.strip()
463 title
= property(fget
=get_title
,
466 def set_custom_title( self
, custom_title
):
467 custom_title
= custom_title
.strip()
469 # if the custom title is the same as we have
470 if custom_title
== self
.override_title
:
473 # if custom title is the same as channel title and we didn't have a custom title
474 if custom_title
== self
.__title
and self
.override_title
== '':
477 # make sure self.foldername is initialized
480 # rename folder if custom_title looks sane
481 new_folder_name
= self
.find_unique_folder_name(custom_title
)
482 if len(new_folder_name
) > 0 and new_folder_name
!= self
.foldername
:
483 log('Changing foldername based on custom title: %s', custom_title
, sender
=self
)
484 new_folder
= os
.path
.join(self
.download_dir
, new_folder_name
)
485 old_folder
= os
.path
.join(self
.download_dir
, self
.foldername
)
486 if os
.path
.exists(old_folder
):
487 if not os
.path
.exists(new_folder
):
488 # Old folder exists, new folder does not -> simply rename
489 log('Renaming %s => %s', old_folder
, new_folder
, sender
=self
)
490 os
.rename(old_folder
, new_folder
)
492 # Both folders exist -> move files and delete old folder
493 log('Moving files from %s to %s', old_folder
, new_folder
, sender
=self
)
494 for file in glob
.glob(os
.path
.join(old_folder
, '*')):
495 shutil
.move(file, new_folder
)
496 log('Removing %s', old_folder
, sender
=self
)
497 shutil
.rmtree(old_folder
, ignore_errors
=True)
498 self
.foldername
= new_folder_name
501 if custom_title
!= self
.__title
:
502 self
.override_title
= custom_title
504 self
.override_title
= ''
506 def get_downloaded_episodes(self
):
507 return self
.db
.load_episodes(self
, factory
=self
.episode_factory
, state
=gpodder
.STATE_DOWNLOADED
)
509 def get_new_episodes(self
, downloading
=lambda e
: False):
511 Get a list of new episodes. You can optionally specify
512 "downloading" as a callback that takes an episode as
513 a parameter and returns True if the episode is currently
514 being downloaded or False if not.
516 By default, "downloading" is implemented so that it
517 reports all episodes as not downloading.
519 return [episode
for episode
in self
.db
.load_episodes(self
, \
520 factory
=self
.episode_factory
, state
=gpodder
.STATE_NORMAL
) if \
521 episode
.check_is_new(downloading
=downloading
)]
523 def get_playlist_filename(self
):
524 # If the save_dir doesn't end with a slash (which it really should
525 # not, if the implementation is correct, we can just append .m3u :)
526 assert self
.save_dir
[-1] != '/'
527 return self
.save_dir
+'.m3u'
529 def update_m3u_playlist(self
):
530 m3u_filename
= self
.get_playlist_filename()
532 downloaded_episodes
= self
.get_downloaded_episodes()
533 if not downloaded_episodes
:
534 log('No episodes - removing %s', m3u_filename
, sender
=self
)
535 util
.delete_file(m3u_filename
)
538 log('Writing playlist to %s', m3u_filename
, sender
=self
)
539 util
.write_m3u_playlist(m3u_filename
, \
540 PodcastEpisode
.sort_by_pubdate(downloaded_episodes
))
542 def get_episode_by_url(self
, url
):
543 return self
.db
.load_single_episode(self
, \
544 factory
=self
.episode_factory
, url
=url
)
546 def get_episode_by_filename(self
, filename
):
547 return self
.db
.load_single_episode(self
, \
548 factory
=self
.episode_factory
, filename
=filename
)
550 def get_all_episodes(self
):
551 return self
.db
.load_episodes(self
, factory
=self
.episode_factory
)
553 def find_unique_folder_name(self
, foldername
):
554 # Remove trailing dots to avoid errors on Windows (bug 600)
555 foldername
= foldername
.strip().rstrip('.')
557 current_try
= util
.sanitize_filename(foldername
, \
558 self
.MAX_FOLDERNAME_LENGTH
)
562 if self
.db
.channel_foldername_exists(current_try
):
563 current_try
= '%s (%d)' % (foldername
, next_try_id
)
568 def get_save_dir(self
):
569 urldigest
= hashlib
.md5(self
.url
).hexdigest()
570 sanitizedurl
= util
.sanitize_filename(self
.url
, self
.MAX_FOLDERNAME_LENGTH
)
571 if self
.foldername
is None or (self
.auto_foldername
and (self
.foldername
== urldigest
or self
.foldername
.startswith(sanitizedurl
))):
572 # we must change the folder name, because it has not been set manually
573 fn_template
= util
.sanitize_filename(self
.title
, self
.MAX_FOLDERNAME_LENGTH
)
575 # if this is an empty string, try the basename
576 if len(fn_template
) == 0:
577 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self
.url
, sender
=self
)
578 fn_template
= util
.sanitize_filename(os
.path
.basename(self
.url
), self
.MAX_FOLDERNAME_LENGTH
)
580 # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
581 if len(fn_template
) == 0:
582 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self
.url
, sender
=self
)
583 fn_template
= urldigest
# no need for sanitize_filename here
585 # Find a unique folder name for this podcast
586 wanted_foldername
= self
.find_unique_folder_name(fn_template
)
588 # if the foldername has not been set, check if the (old) md5 filename exists
589 if self
.foldername
is None and os
.path
.exists(os
.path
.join(self
.download_dir
, urldigest
)):
590 log('Found pre-0.15.0 download folder for %s: %s', self
.title
, urldigest
, sender
=self
)
591 self
.foldername
= urldigest
593 # we have a valid, new folder name in "current_try" -> use that!
594 if self
.foldername
is not None and wanted_foldername
!= self
.foldername
:
595 # there might be an old download folder crawling around - move it!
596 new_folder_name
= os
.path
.join(self
.download_dir
, wanted_foldername
)
597 old_folder_name
= os
.path
.join(self
.download_dir
, self
.foldername
)
598 if os
.path
.exists(old_folder_name
):
599 if not os
.path
.exists(new_folder_name
):
600 # Old folder exists, new folder does not -> simply rename
601 log('Renaming %s => %s', old_folder_name
, new_folder_name
, sender
=self
)
602 os
.rename(old_folder_name
, new_folder_name
)
604 # Both folders exist -> move files and delete old folder
605 log('Moving files from %s to %s', old_folder_name
, new_folder_name
, sender
=self
)
606 for file in glob
.glob(os
.path
.join(old_folder_name
, '*')):
607 shutil
.move(file, new_folder_name
)
608 log('Removing %s', old_folder_name
, sender
=self
)
609 shutil
.rmtree(old_folder_name
, ignore_errors
=True)
610 log('Updating foldername of %s to "%s".', self
.url
, wanted_foldername
, sender
=self
)
611 self
.foldername
= wanted_foldername
614 save_dir
= os
.path
.join(self
.download_dir
, self
.foldername
)
616 # Create save_dir if it does not yet exist
617 if not util
.make_directory( save_dir
):
618 log( 'Could not create save_dir: %s', save_dir
, sender
= self
)
622 save_dir
= property(fget
=get_save_dir
)
624 def remove_downloaded(self
):
625 # Remove the playlist file if it exists
626 m3u_filename
= self
.get_playlist_filename()
627 if os
.path
.exists(m3u_filename
):
628 util
.delete_file(m3u_filename
)
630 # Remove the download directory
631 shutil
.rmtree(self
.save_dir
, True)
634 def cover_file(self
):
635 new_name
= os
.path
.join(self
.save_dir
, 'folder.jpg')
636 if not os
.path
.exists(new_name
):
637 old_names
= ('cover', '.cover')
638 for old_name
in old_names
:
639 filename
= os
.path
.join(self
.save_dir
, old_name
)
640 if os
.path
.exists(filename
):
641 shutil
.move(filename
, new_name
)
646 def delete_episode(self
, episode
):
647 filename
= episode
.local_filename(create
=False, check_only
=True)
648 if filename
is not None:
649 util
.delete_file(filename
)
651 episode
.set_state(gpodder
.STATE_DELETED
)
654 class PodcastEpisode(PodcastModelObject
):
655 """holds data for one object in a channel"""
656 MAX_FILENAME_LENGTH
= 200
658 def _get_played(self
):
659 return self
.is_played
661 def _set_played(self
, played
):
662 self
.is_played
= played
664 # Alias "is_played" to "played" for DB column mapping
665 played
= property(fget
=_get_played
, fset
=_set_played
)
667 def _get_locked(self
):
668 return self
.is_locked
670 def _set_locked(self
, locked
):
671 self
.is_locked
= locked
673 # Alias "is_locked" to "locked" for DB column mapping
674 locked
= property(fget
=_get_locked
, fset
=_set_locked
)
676 def _get_channel_id(self
):
677 return self
.channel
.id
679 def _set_channel_id(self
, channel_id
):
680 assert self
.channel
.id == channel_id
682 # Accessor for the "channel_id" DB column
683 channel_id
= property(fget
=_get_channel_id
, fset
=_set_channel_id
)
686 def sort_by_pubdate(episodes
, reverse
=False):
687 """Sort a list of PodcastEpisode objects chronologically
689 Returns a iterable, sorted sequence of the episodes
691 key_pubdate
= lambda e
: e
.pubDate
692 return sorted(episodes
, key
=key_pubdate
, reverse
=reverse
)
694 def reload_from_db(self
):
696 Re-reads all episode details for this object from the
697 database and updates this object accordingly. Can be
698 used to refresh existing objects when the database has
699 been updated (e.g. the filename has been set after a
700 download where it was not set before the download)
702 d
= self
.db
.load_episode(self
.id)
703 self
.update_from_dict(d
or {})
706 def has_website_link(self
):
707 return bool(self
.link
) and (self
.link
!= self
.url
or \
708 youtube
.is_video_link(self
.link
))
711 def from_feedparser_entry(entry
, channel
, mimetype_prefs
=''):
712 episode
= PodcastEpisode(channel
)
714 # Replace multi-space and newlines with single space (Maemo bug 11173)
715 episode
.title
= re
.sub('\s+', ' ', entry
.get('title', ''))
716 episode
.link
= entry
.get('link', '')
717 if 'content' in entry
and len(entry
['content']) and \
718 entry
['content'][0].type == 'text/html':
719 episode
.description
= entry
['content'][0].value
721 episode
.description
= entry
.get('summary', '')
724 # Parse iTunes-specific podcast duration metadata
725 total_time
= util
.parse_time(entry
.get('itunes_duration', ''))
726 episode
.total_time
= total_time
730 # Fallback to subtitle if summary is not available0
731 if not episode
.description
:
732 episode
.description
= entry
.get('subtitle', '')
734 episode
.guid
= entry
.get('id', '')
735 if entry
.get('updated_parsed', None):
736 episode
.pubDate
= rfc822
.mktime_tz(entry
.updated_parsed
+(0,))
738 enclosures
= entry
.get('enclosures', ())
739 audio_available
= any(e
.get('type', '').startswith('audio/') \
741 video_available
= any(e
.get('type', '').startswith('video/') \
744 # Create the list of preferred mime types
745 mimetype_prefs
= mimetype_prefs
.split(',')
747 def calculate_preference_value(enclosure
):
748 """Calculate preference value of an enclosure
750 This is based on mime types and allows users to prefer
751 certain mime types over others (e.g. MP3 over AAC, ...)
753 mimetype
= enclosure
.get('type', None)
755 # If the mime type is found, return its (zero-based) index
756 return mimetype_prefs
.index(mimetype
)
758 # If it is not found, assume it comes after all listed items
759 return len(mimetype_prefs
)
762 for e
in sorted(enclosures
, key
=calculate_preference_value
):
763 episode
.mimetype
= e
.get('type', 'application/octet-stream')
764 if episode
.mimetype
== '':
765 # See Maemo bug 10036
766 log('Fixing empty mimetype in ugly feed', sender
=episode
)
767 episode
.mimetype
= 'application/octet-stream'
769 if '/' not in episode
.mimetype
:
772 # Skip images in feeds if audio or video is available (bug 979)
773 if episode
.mimetype
.startswith('image/') and \
774 (audio_available
or video_available
):
777 episode
.url
= util
.normalize_feed_url(e
.get('href', ''))
782 episode
.length
= int(e
.length
) or -1
789 for m
in entry
.get('media_content', ()):
790 episode
.mimetype
= m
.get('type', 'application/octet-stream')
791 if '/' not in episode
.mimetype
:
794 episode
.url
= util
.normalize_feed_url(m
.get('url', ''))
799 episode
.length
= int(m
.fileSize
) or -1
805 # Brute-force detection of any links
806 for l
in entry
.get('links', ()):
807 episode
.url
= util
.normalize_feed_url(l
.get('href', ''))
811 if youtube
.is_video_link(episode
.url
):
814 # Check if we can resolve this link to a audio/video file
815 filename
, extension
= util
.filename_from_url(episode
.url
)
816 file_type
= util
.file_type_by_extension(extension
)
817 if file_type
is None and hasattr(l
, 'type'):
818 extension
= util
.extension_from_mimetype(l
.type)
819 file_type
= util
.file_type_by_extension(extension
)
821 # The link points to a audio or video file - use it!
822 if file_type
is not None:
825 # Scan MP3 links in description text
826 mp3s
= re
.compile(r
'http://[^"]*\.mp3')
827 for content
in entry
.get('content', ()):
829 for match
in mp3s
.finditer(html
):
830 episode
.url
= match
.group(0)
835 def __init__(self
, channel
):
837 # Used by Storage for faster saving
842 self
.mimetype
= 'application/octet-stream'
844 self
.description
= ''
846 self
.channel
= channel
849 self
.auto_filename
= 1 # automatically generated filename
851 self
.state
= gpodder
.STATE_NORMAL
852 self
.is_played
= False
854 # Initialize the "is_locked" property
855 self
._is
_locked
= False
856 self
.is_locked
= channel
.channel_is_locked
860 self
.current_position
= 0
861 self
.current_position_updated
= 0
863 def get_is_locked(self
):
864 return self
._is
_locked
866 def set_is_locked(self
, is_locked
):
867 self
._is
_locked
= bool(is_locked
)
869 is_locked
= property(fget
=get_is_locked
, fset
=set_is_locked
)
872 if self
.state
!= gpodder
.STATE_DOWNLOADED
and self
.file_exists():
873 self
.state
= gpodder
.STATE_DOWNLOADED
874 if gpodder
.user_hooks
is not None:
875 gpodder
.user_hooks
.on_episode_save(self
)
876 self
.db
.save_episode(self
)
878 def on_downloaded(self
, filename
):
879 self
.state
= gpodder
.STATE_DOWNLOADED
880 self
.is_played
= False
881 self
.length
= os
.path
.getsize(filename
)
883 if not self
.total_time
:
885 length
= gstreamer
.get_track_length(filename
)
886 if length
is not None:
887 length
= int(length
/1000)
888 log('Detected media length: %d seconds', length
, \
890 self
.total_time
= length
891 self
.db
.save_episode(self
)
895 log('Error while detecting media length: %s', str(e
), \
898 self
.db
.save_downloaded_episode(self
)
901 def set_state(self
, state
):
903 self
.db
.update_episode_state(self
)
905 def mark(self
, state
=None, is_played
=None, is_locked
=None):
906 if state
is not None:
908 if is_played
is not None:
909 self
.is_played
= is_played
910 if is_locked
is not None:
911 self
.is_locked
= is_locked
912 self
.db
.update_episode_state(self
)
915 def title_markup(self
):
916 return '%s\n<small>%s</small>' % (xml
.sax
.saxutils
.escape(self
.title
),
917 xml
.sax
.saxutils
.escape(self
.channel
.title
))
920 def maemo_markup(self
):
922 length_str
= '%s; ' % self
.filesize_prop
925 return ('<b>%s</b>\n<small>%s'+_('released %s')+ \
926 '; '+_('from %s')+'</small>') % (\
927 xml
.sax
.saxutils
.escape(re
.sub('\s+', ' ', self
.title
)), \
928 xml
.sax
.saxutils
.escape(length_str
), \
929 xml
.sax
.saxutils
.escape(self
.pubdate_prop
), \
930 xml
.sax
.saxutils
.escape(re
.sub('\s+', ' ', self
.channel
.title
)))
933 def maemo_remove_markup(self
):
934 if self
.total_time
and self
.current_position
:
935 played_string
= self
.get_play_info_string()
937 played_string
= _('played')
939 played_string
= _('unplayed')
940 downloaded_string
= self
.get_age_string()
941 if not downloaded_string
:
942 downloaded_string
= _('today')
943 return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
944 '; '+_('from %s')+'</small>') % (\
945 xml
.sax
.saxutils
.escape(self
.title
), \
946 xml
.sax
.saxutils
.escape(self
.filesize_prop
), \
947 xml
.sax
.saxutils
.escape(played_string
), \
948 xml
.sax
.saxutils
.escape(downloaded_string
), \
949 xml
.sax
.saxutils
.escape(self
.channel
.title
))
951 def age_in_days(self
):
952 return util
.file_age_in_days(self
.local_filename(create
=False, \
955 age_int_prop
= property(fget
=age_in_days
)
957 def get_age_string(self
):
958 return util
.file_age_to_string(self
.age_in_days())
960 age_prop
= property(fget
=get_age_string
)
962 def one_line_description( self
):
963 lines
= util
.remove_html_tags(self
.description
or '').strip().splitlines()
964 if not lines
or lines
[0] == '':
965 return _('No description available')
967 return ' '.join(lines
)
969 def delete_from_disk(self
):
971 self
.channel
.delete_episode(self
)
973 log('Cannot delete episode from disk: %s', self
.title
, traceback
=True, sender
=self
)
975 def find_unique_file_name(self
, url
, filename
, extension
):
976 current_try
= util
.sanitize_filename(filename
, self
.MAX_FILENAME_LENGTH
)+extension
980 if self
.filename
== current_try
and current_try
is not None:
981 # We already have this filename - good!
984 while self
.db
.episode_filename_exists(current_try
):
985 current_try
= '%s (%d)%s' % (filename
, next_try_id
, extension
)
990 def local_filename(self
, create
, force_update
=False, check_only
=False,
992 """Get (and possibly generate) the local saving filename
994 Pass create=True if you want this function to generate a
995 new filename if none exists. You only want to do this when
996 planning to create/download the file after calling this function.
998 Normally, you should pass create=False. This will only
999 create a filename when the file already exists from a previous
1000 version of gPodder (where we used md5 filenames). If the file
1001 does not exist (and the filename also does not exist), this
1002 function will return None.
1004 If you pass force_update=True to this function, it will try to
1005 find a new (better) filename and move the current file if this
1006 is the case. This is useful if (during the download) you get
1007 more information about the file, e.g. the mimetype and you want
1008 to include this information in the file name generation process.
1010 If check_only=True is passed to this function, it will never try
1011 to rename the file, even if would be a good idea. Use this if you
1012 only want to check if a file exists.
1014 If "template" is specified, it should be a filename that is to
1015 be used as a template for generating the "real" filename.
1017 The generated filename is stored in the database for future access.
1019 ext
= self
.extension(may_call_local_filename
=False).encode('utf-8', 'ignore')
1021 # For compatibility with already-downloaded episodes, we
1022 # have to know md5 filenames if they are downloaded already
1023 urldigest
= hashlib
.md5(self
.url
).hexdigest()
1025 if not create
and self
.filename
is None:
1026 urldigest_filename
= os
.path
.join(self
.channel
.save_dir
, urldigest
+ext
)
1027 if os
.path
.exists(urldigest_filename
):
1028 # The file exists, so set it up in our database
1029 log('Recovering pre-0.15.0 file: %s', urldigest_filename
, sender
=self
)
1030 self
.filename
= urldigest
+ext
1031 self
.auto_filename
= 1
1033 return urldigest_filename
1036 # We only want to check if the file exists, so don't try to
1037 # rename the file, even if it would be reasonable. See also:
1038 # http://bugs.gpodder.org/attachment.cgi?id=236
1040 if self
.filename
is None:
1043 return os
.path
.join(self
.channel
.save_dir
, self
.filename
)
1045 if self
.filename
is None or force_update
or (self
.auto_filename
and self
.filename
== urldigest
+ext
):
1046 # Try to find a new filename for the current file
1047 if template
is not None:
1048 # If template is specified, trust the template's extension
1049 episode_filename
, ext
= os
.path
.splitext(template
)
1051 episode_filename
, extension_UNUSED
= util
.filename_from_url(self
.url
)
1052 fn_template
= util
.sanitize_filename(episode_filename
, self
.MAX_FILENAME_LENGTH
)
1054 if 'redirect' in fn_template
and template
is None:
1055 # This looks like a redirection URL - force URL resolving!
1056 log('Looks like a redirection to me: %s', self
.url
, sender
=self
)
1057 url
= util
.get_real_url(self
.channel
.authenticate_url(self
.url
))
1058 log('Redirection resolved to: %s', url
, sender
=self
)
1059 (episode_filename
, extension_UNUSED
) = util
.filename_from_url(url
)
1060 fn_template
= util
.sanitize_filename(episode_filename
, self
.MAX_FILENAME_LENGTH
)
1062 # Use the video title for YouTube downloads
1063 for yt_url
in ('http://youtube.com/', 'http://www.youtube.com/'):
1064 if self
.url
.startswith(yt_url
):
1065 fn_template
= util
.sanitize_filename(os
.path
.basename(self
.title
), self
.MAX_FILENAME_LENGTH
)
1067 # If the basename is empty, use the md5 hexdigest of the URL
1068 if len(fn_template
) == 0 or fn_template
.startswith('redirect.'):
1069 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self
.channel
.url
, self
.url
, sender
=self
)
1070 fn_template
= urldigest
1072 # Find a unique filename for this episode
1073 wanted_filename
= self
.find_unique_file_name(self
.url
, fn_template
, ext
)
1075 # We populate the filename field the first time - does the old file still exist?
1076 if self
.filename
is None and os
.path
.exists(os
.path
.join(self
.channel
.save_dir
, urldigest
+ext
)):
1077 log('Found pre-0.15.0 downloaded file: %s', urldigest
, sender
=self
)
1078 self
.filename
= urldigest
+ext
1080 # The old file exists, but we have decided to want a different filename
1081 if self
.filename
is not None and wanted_filename
!= self
.filename
:
1082 # there might be an old download folder crawling around - move it!
1083 new_file_name
= os
.path
.join(self
.channel
.save_dir
, wanted_filename
)
1084 old_file_name
= os
.path
.join(self
.channel
.save_dir
, self
.filename
)
1085 if os
.path
.exists(old_file_name
) and not os
.path
.exists(new_file_name
):
1086 log('Renaming %s => %s', old_file_name
, new_file_name
, sender
=self
)
1087 os
.rename(old_file_name
, new_file_name
)
1088 elif force_update
and not os
.path
.exists(old_file_name
):
1089 # When we call force_update, the file might not yet exist when we
1090 # call it from the downloading code before saving the file
1091 log('Choosing new filename: %s', new_file_name
, sender
=self
)
1093 log('Warning: %s exists or %s does not.', new_file_name
, old_file_name
, sender
=self
)
1094 log('Updating filename of %s to "%s".', self
.url
, wanted_filename
, sender
=self
)
1095 elif self
.filename
is None:
1096 log('Setting filename to "%s".', wanted_filename
, sender
=self
)
1098 log('Should update filename. Stays the same (%s). Good!', \
1099 wanted_filename
, sender
=self
)
1100 self
.filename
= wanted_filename
1104 return os
.path
.join(self
.channel
.save_dir
, self
.filename
)
1106 def set_mimetype(self
, mimetype
, commit
=False):
1107 """Sets the mimetype for this episode"""
1108 self
.mimetype
= mimetype
1112 def extension(self
, may_call_local_filename
=True):
1113 filename
, ext
= util
.filename_from_url(self
.url
)
1114 if may_call_local_filename
:
1115 filename
= self
.local_filename(create
=False)
1116 if filename
is not None:
1117 filename
, ext
= os
.path
.splitext(filename
)
1118 # if we can't detect the extension from the url fallback on the mimetype
1119 if ext
== '' or util
.file_type_by_extension(ext
) is None:
1120 ext
= util
.extension_from_mimetype(self
.mimetype
)
1123 def check_is_new(self
, downloading
=lambda e
: False):
1125 Returns True if this episode is to be considered new.
1126 "Downloading" should be a callback that gets an episode
1127 as its parameter and returns True if the episode is
1128 being downloaded at the moment.
1130 return self
.state
== gpodder
.STATE_NORMAL
and \
1131 not self
.is_played
and \
1132 not downloading(self
)
1135 self
.state
= gpodder
.STATE_NORMAL
1136 self
.is_played
= False
1137 self
.db
.update_episode_state(self
)
1140 self
.is_played
= True
1141 self
.db
.update_episode_state(self
)
1143 def file_exists(self
):
1144 filename
= self
.local_filename(create
=False, check_only
=True)
1145 if filename
is None:
1148 return os
.path
.exists(filename
)
1150 def was_downloaded(self
, and_exists
=False):
1151 if self
.state
!= gpodder
.STATE_DOWNLOADED
:
1153 if and_exists
and not self
.file_exists():
1157 def sync_filename(self
, use_custom
=False, custom_format
=None):
1159 return util
.object_string_formatter(custom_format
,
1160 episode
=self
, podcast
=self
.channel
)
1164 def file_type(self
):
1165 # Assume all YouTube links are video files
1166 if youtube
.is_video_link(self
.url
):
1169 return util
.file_type_by_extension(self
.extension())
1172 def basename( self
):
1173 return os
.path
.splitext( os
.path
.basename( self
.url
))[0]
1176 def published( self
):
1178 Returns published date as YYYYMMDD (or 00000000 if not available)
1181 return datetime
.datetime
.fromtimestamp(self
.pubDate
).strftime('%Y%m%d')
1183 log( 'Cannot format pubDate for "%s".', self
.title
, sender
= self
)
1189 Returns published time as HHMM (or 0000 if not available)
1192 return datetime
.datetime
.fromtimestamp(self
.pubDate
).strftime('%H%M')
1194 log('Cannot format pubDate (time) for "%s".', self
.title
, sender
=self
)
1197 def playlist_title(self
):
1198 """Return a title for this episode in a playlist
1200 The title will be composed of the podcast name, the
1201 episode name and the publication date. The return
1202 value is the canonical representation of this episode
1203 in playlists (for example, M3U playlists).
1205 return '%s - %s (%s)' % (self
.channel
.title
, \
1207 self
.cute_pubdate())
1209 def cute_pubdate(self
):
1210 result
= util
.format_date(self
.pubDate
)
1212 return '(%s)' % _('unknown')
1216 pubdate_prop
= property(fget
=cute_pubdate
)
1218 def calculate_filesize( self
):
1219 filename
= self
.local_filename(create
=False)
1220 if filename
is None:
1221 log('calculate_filesized called, but filename is None!', sender
=self
)
1223 self
.length
= os
.path
.getsize(filename
)
1225 log( 'Could not get filesize for %s.', self
.url
)
1227 def is_finished(self
):
1228 """Return True if this episode is considered "finished playing"
1230 An episode is considered "finished" when there is a
1231 current position mark on the track, and when the
1232 current position is greater than 99 percent of the
1233 total time or inside the last 10 seconds of a track.
1235 return self
.current_position
> 0 and \
1236 (self
.current_position
+ 10 >= self
.total_time
or \
1237 self
.current_position
>= self
.total_time
*.99)
1239 def get_play_info_string(self
):
1240 if self
.is_finished():
1241 return '%s (%s)' % (_('Finished'), self
.get_duration_string(),)
1242 if self
.current_position
> 0:
1243 return '%s / %s' % (self
.get_position_string(), \
1244 self
.get_duration_string())
1246 return self
.get_duration_string()
1248 def get_position_string(self
):
1249 return util
.format_time(self
.current_position
)
1251 def get_duration_string(self
):
1252 return util
.format_time(self
.total_time
)
1254 def get_filesize_string(self
):
1255 return util
.format_filesize(self
.length
)
1257 filesize_prop
= property(fget
=get_filesize_string
)
1259 def get_played_string( self
):
1260 if not self
.is_played
:
1261 return _('Unplayed')
1265 played_prop
= property(fget
=get_played_string
)
1267 def is_duplicate(self
, episode
):
1268 if self
.title
== episode
.title
and self
.pubDate
== episode
.pubDate
:
1269 log('Possible duplicate detected: %s', self
.title
)
1273 def duplicate_id(self
):
1274 return hash((self
.title
, self
.pubDate
))
1276 def update_from(self
, episode
):
1277 for k
in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
1278 setattr(self
, k
, getattr(episode
, k
))