1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # gpodder.model - Core model classes for gPodder (2009-08-13)
23 # Based on libpodcasts.py (thp, 2005-10-29)
27 from gpodder
import util
28 from gpodder
import feedcore
29 from gpodder
import youtube
30 from gpodder
import corestats
31 from gpodder
import gstreamer
33 from gpodder
.liblogger
import log
44 import xml
.sax
.saxutils
49 class CustomFeed(feedcore
.ExceptionWithData
): pass
51 class gPodderFetcher(feedcore
.Fetcher
):
53 This class extends the feedcore Fetcher with the gPodder User-Agent and the
54 Proxy handler based on the current settings in gPodder and provides a
55 convenience method (fetch_channel) for use by PodcastChannel objects.
60 feedcore
.Fetcher
.__init
__(self
, gpodder
.user_agent
)
62 def fetch_channel(self
, channel
):
64 modified
= feedparser
._parse
_date
(channel
.last_modified
)
65 # If we have a username or password, rebuild the url with them included
66 # Note: using a HTTPBasicAuthHandler would be pain because we need to
67 # know the realm. It can be done, but I think this method works, too
68 url
= channel
.authenticate_url(channel
.url
)
69 for handler
in self
.custom_handlers
:
70 custom_feed
= handler
.handle_url(url
)
71 if custom_feed
is not None:
72 raise CustomFeed(custom_feed
)
73 self
.fetch(url
, etag
, modified
)
75 def _resolve_url(self
, url
):
76 return youtube
.get_real_channel_url(url
)
79 def register(cls
, handler
):
80 cls
.custom_handlers
.append(handler
)
82 # def _get_handlers(self):
83 # # Add a ProxyHandler for fetching data via a proxy server
84 # proxies = {'http': 'http://proxy.example.org:8080'}
85 # return[urllib2.ProxyHandler(proxies))]
87 # The "register" method is exposed here for external usage
88 register_custom_handler
= gPodderFetcher
.register
90 class PodcastModelObject(object):
92 A generic base class for our podcast model providing common helper
93 and utility functions.
97 def create_from_dict(cls
, d
, *args
):
99 Create a new object, passing "args" to the constructor
100 and then updating the object with the values from "d".
103 o
.update_from_dict(d
)
106 def update_from_dict(self
, d
):
108 Updates the attributes of this object with values from the
109 dictionary "d" by using the keys found in "d".
113 setattr(self
, k
, d
[k
])
116 class PodcastChannel(PodcastModelObject
):
117 """holds data for a complete channel"""
118 MAX_FOLDERNAME_LENGTH
= 150
119 SECONDS_PER_WEEK
= 7*24*60*60
121 feed_fetcher
= gPodderFetcher()
124 def build_factory(cls
, download_dir
):
125 def factory(dict, db
):
126 return cls
.create_from_dict(dict, db
, download_dir
)
130 def load_from_db(cls
, db
, download_dir
):
131 return db
.load_channels(factory
=cls
.build_factory(download_dir
))
134 def load(cls
, db
, url
, create
=True, authentication_tokens
=None,\
135 max_episodes
=0, download_dir
=None, allow_empty_feeds
=False, \
137 if isinstance(url
, unicode):
138 url
= url
.encode('utf-8')
140 tmp
= db
.load_channels(factory
=cls
.build_factory(download_dir
), url
=url
)
144 tmp
= PodcastChannel(db
, download_dir
)
146 if authentication_tokens
is not None:
147 tmp
.username
= authentication_tokens
[0]
148 tmp
.password
= authentication_tokens
[1]
150 tmp
.update(max_episodes
, mimetype_prefs
)
152 db
.force_last_new(tmp
)
153 # Subscribing to empty feeds should yield an error (except if
154 # the user specifically allows empty feeds in the config UI)
155 if sum(tmp
.get_statistics()) == 0 and not allow_empty_feeds
:
157 raise Exception(_('No downloadable episodes in feed'))
160 def episode_factory(self
, d
, db__parameter_is_unused
=None):
162 This function takes a dictionary containing key-value pairs for
163 episodes and returns a new PodcastEpisode object that is connected
164 to this PodcastChannel object.
166 Returns: A new PodcastEpisode object
168 return PodcastEpisode
.create_from_dict(d
, self
)
170 def _consume_custom_feed(self
, custom_feed
, max_episodes
=0):
171 self
.title
= custom_feed
.get_title()
172 self
.link
= custom_feed
.get_link()
173 self
.description
= custom_feed
.get_description()
174 self
.image
= custom_feed
.get_image()
175 self
.pubDate
= time
.time()
178 guids
= [episode
.guid
for episode
in self
.get_all_episodes()]
180 # Insert newly-found episodes into the database
181 custom_feed
.get_new_episodes(self
, guids
)
185 self
.db
.purge(max_episodes
, self
.id)
187 def _consume_updated_feed(self
, feed
, max_episodes
=0, mimetype_prefs
=''):
188 self
.parse_error
= feed
.get('bozo_exception', None)
190 # Replace multi-space and newlines with single space (Maemo bug 11173)
191 self
.title
= re
.sub('\s+', ' ', feed
.feed
.get('title', self
.url
))
193 self
.link
= feed
.feed
.get('link', self
.link
)
194 self
.description
= feed
.feed
.get('subtitle', self
.description
)
195 # Start YouTube-specific title FIX
196 YOUTUBE_PREFIX
= 'Uploads by '
197 if self
.title
.startswith(YOUTUBE_PREFIX
):
198 self
.title
= self
.title
[len(YOUTUBE_PREFIX
):] + ' on YouTube'
199 # End YouTube-specific title FIX
202 self
.pubDate
= rfc822
.mktime_tz(feed
.feed
.get('updated_parsed', None+(0,)))
204 self
.pubDate
= time
.time()
206 if hasattr(feed
.feed
, 'image'):
207 for attribute
in ('href', 'url'):
208 new_value
= getattr(feed
.feed
.image
, attribute
, None)
209 if new_value
is not None:
210 log('Found cover art in %s: %s', attribute
, new_value
)
211 self
.image
= new_value
213 if hasattr(feed
.feed
, 'icon'):
214 self
.image
= feed
.feed
.icon
218 # Load all episodes to update them properly.
219 existing
= self
.get_all_episodes()
221 # We can limit the maximum number of entries that gPodder will parse
222 if max_episodes
> 0 and len(feed
.entries
) > max_episodes
:
223 entries
= feed
.entries
[:max_episodes
]
225 entries
= feed
.entries
227 # Title + PubDate hashes for existing episodes
228 existing_dupes
= dict((e
.duplicate_id(), e
) for e
in existing
)
230 # GUID-based existing episode list
231 existing_guids
= dict((e
.guid
, e
) for e
in existing
)
233 # Get most recent pubDate of all episodes
234 last_pubdate
= self
.db
.get_last_pubdate(self
) or 0
236 # Search all entries for new episodes
237 for entry
in entries
:
239 episode
= PodcastEpisode
.from_feedparser_entry(entry
, self
, mimetype_prefs
)
240 if episode
is not None and not episode
.title
:
241 episode
.title
, ext
= os
.path
.splitext(os
.path
.basename(episode
.url
))
243 log('Cannot instantiate episode: %s. Skipping.', e
, sender
=self
, traceback
=True)
249 # Detect (and update) existing episode based on GUIDs
250 existing_episode
= existing_guids
.get(episode
.guid
, None)
252 existing_episode
.update_from(episode
)
253 existing_episode
.save()
256 # Detect (and update) existing episode based on duplicate ID
257 existing_episode
= existing_dupes
.get(episode
.duplicate_id(), None)
259 if existing_episode
.is_duplicate(episode
):
260 existing_episode
.update_from(episode
)
261 existing_episode
.save()
264 # Workaround for bug 340: If the episode has been
265 # published earlier than one week before the most
266 # recent existing episode, do not mark it as new.
267 if episode
.pubDate
< last_pubdate
- self
.SECONDS_PER_WEEK
:
268 log('Episode with old date: %s', episode
.title
, sender
=self
)
269 episode
.is_played
= True
273 # Remove "unreachable" episodes - episodes that have not been
274 # downloaded and that the feed does not list as downloadable anymore
275 if self
.id is not None:
276 seen_guids
= set(e
.guid
for e
in feed
.entries
if hasattr(e
, 'guid'))
277 episodes_to_purge
= (e
for e
in existing
if \
278 e
.state
!= gpodder
.STATE_DOWNLOADED
and \
279 e
.guid
not in seen_guids
and e
.guid
is not None)
280 for episode
in episodes_to_purge
:
281 log('Episode removed from feed: %s (%s)', episode
.title
, \
282 episode
.guid
, sender
=self
)
283 self
.db
.delete_episode_by_guid(episode
.guid
, self
.id)
285 # This *might* cause episodes to be skipped if there were more than
286 # max_episodes_per_feed items added to the feed between updates.
287 # The benefit is that it prevents old episodes from apearing as new
288 # in certain situations (see bug #340).
289 self
.db
.purge(max_episodes
, self
.id)
291 def update_channel_lock(self
):
292 self
.db
.update_channel_lock(self
)
294 def _update_etag_modified(self
, feed
):
295 self
.updated_timestamp
= time
.time()
296 self
.calculate_publish_behaviour()
297 self
.etag
= feed
.headers
.get('etag', self
.etag
)
298 self
.last_modified
= feed
.headers
.get('last-modified', self
.last_modified
)
300 def query_automatic_update(self
):
301 """Query if this channel should be updated automatically
303 Returns True if the update should happen in automatic
304 mode or False if this channel should be skipped (timeout
305 not yet reached or release not expected right now).
307 updated
= self
.updated_timestamp
308 expected
= self
.release_expected
311 one_day_ago
= now
- 60*60*24
312 lastcheck
= now
- 60*10
314 return updated
< one_day_ago
or \
315 (expected
< now
and updated
< lastcheck
)
317 def update(self
, max_episodes
=0, mimetype_prefs
=''):
319 self
.feed_fetcher
.fetch_channel(self
)
320 except CustomFeed
, updated
:
321 custom_feed
= updated
.data
322 self
._consume
_custom
_feed
(custom_feed
, max_episodes
)
324 except feedcore
.UpdatedFeed
, updated
:
326 self
._consume
_updated
_feed
(feed
, max_episodes
, mimetype_prefs
)
327 self
._update
_etag
_modified
(feed
)
329 except feedcore
.NewLocation
, updated
:
332 self
._consume
_updated
_feed
(feed
, max_episodes
, mimetype_prefs
)
333 self
._update
_etag
_modified
(feed
)
335 except feedcore
.NotModified
, updated
:
337 self
._update
_etag
_modified
(feed
)
340 # "Not really" errors
341 #feedcore.AuthenticationRequired
345 #feedcore.InternalServerError
348 #feedcore.Unsubscribe
350 #feedcore.InvalidFeed
351 #feedcore.UnknownStatusCode
354 if gpodder
.user_hooks
is not None:
355 gpodder
.user_hooks
.on_podcast_updated(self
)
360 self
.db
.delete_channel(self
)
363 if gpodder
.user_hooks
is not None:
364 gpodder
.user_hooks
.on_podcast_save(self
)
365 if self
.foldername
is None:
366 # get_save_dir() finds a unique value for foldername
368 self
.db
.save_channel(self
)
370 def get_statistics(self
):
372 return (0, 0, 0, 0, 0)
374 return self
.db
.get_channel_count(int(self
.id))
376 def authenticate_url(self
, url
):
377 return util
.url_add_authentication(url
, self
.username
, self
.password
)
379 def __init__(self
, db
, download_dir
):
381 self
.download_dir
= download_dir
386 self
.description
= ''
389 self
.parse_error
= None
390 self
.foldername
= None
391 self
.auto_foldername
= 1 # automatically generated foldername
393 # should this channel be synced to devices? (ex: iPod)
394 self
.sync_to_devices
= True
395 # to which playlist should be synced
396 self
.device_playlist_name
= 'gPodder'
397 # if set, this overrides the channel-provided title
398 self
.override_title
= ''
402 self
.last_modified
= None
405 self
.save_dir_size
= 0
406 self
.__save
_dir
_size
_set
= False
408 self
.channel_is_locked
= False
410 self
.release_expected
= time
.time()
411 self
.release_deviation
= 0
412 self
.updated_timestamp
= 0
413 self
.feed_update_enabled
= True
415 def calculate_publish_behaviour(self
):
416 episodes
= self
.db
.load_episodes(self
, factory
=self
.episode_factory
, limit
=30)
417 if len(episodes
) < 3:
421 latest
= max(e
.pubDate
for e
in episodes
)
422 for index
in range(len(episodes
)-1):
423 if episodes
[index
].pubDate
!= 0 and episodes
[index
+1].pubDate
!= 0:
424 deltas
.append(episodes
[index
].pubDate
- episodes
[index
+1].pubDate
)
427 stats
= corestats
.Stats(deltas
)
428 self
.release_expected
= min([latest
+stats
.stdev(), latest
+(stats
.min()+stats
.avg())*.5])
429 self
.release_deviation
= stats
.stdev()
431 self
.release_expected
= latest
432 self
.release_deviation
= 0
434 def request_save_dir_size(self
):
435 if not self
.__save
_dir
_size
_set
:
436 self
.update_save_dir_size()
437 self
.__save
_dir
_size
_set
= True
439 def update_save_dir_size(self
):
440 self
.save_dir_size
= util
.calculate_size(self
.save_dir
)
442 def get_title( self
):
443 if self
.override_title
:
444 return self
.override_title
445 elif not self
.__title
.strip():
450 def set_title( self
, value
):
451 self
.__title
= value
.strip()
453 title
= property(fget
=get_title
,
456 def set_custom_title( self
, custom_title
):
457 custom_title
= custom_title
.strip()
459 # if the custom title is the same as we have
460 if custom_title
== self
.override_title
:
463 # if custom title is the same as channel title and we didn't have a custom title
464 if custom_title
== self
.__title
and self
.override_title
== '':
467 # make sure self.foldername is initialized
470 # rename folder if custom_title looks sane
471 new_folder_name
= self
.find_unique_folder_name(custom_title
)
472 if len(new_folder_name
) > 0 and new_folder_name
!= self
.foldername
:
473 log('Changing foldername based on custom title: %s', custom_title
, sender
=self
)
474 new_folder
= os
.path
.join(self
.download_dir
, new_folder_name
)
475 old_folder
= os
.path
.join(self
.download_dir
, self
.foldername
)
476 if os
.path
.exists(old_folder
):
477 if not os
.path
.exists(new_folder
):
478 # Old folder exists, new folder does not -> simply rename
479 log('Renaming %s => %s', old_folder
, new_folder
, sender
=self
)
480 os
.rename(old_folder
, new_folder
)
482 # Both folders exist -> move files and delete old folder
483 log('Moving files from %s to %s', old_folder
, new_folder
, sender
=self
)
484 for file in glob
.glob(os
.path
.join(old_folder
, '*')):
485 shutil
.move(file, new_folder
)
486 log('Removing %s', old_folder
, sender
=self
)
487 shutil
.rmtree(old_folder
, ignore_errors
=True)
488 self
.foldername
= new_folder_name
491 if custom_title
!= self
.__title
:
492 self
.override_title
= custom_title
494 self
.override_title
= ''
496 def get_downloaded_episodes(self
):
497 return self
.db
.load_episodes(self
, factory
=self
.episode_factory
, state
=gpodder
.STATE_DOWNLOADED
)
499 def get_new_episodes(self
, downloading
=lambda e
: False):
501 Get a list of new episodes. You can optionally specify
502 "downloading" as a callback that takes an episode as
503 a parameter and returns True if the episode is currently
504 being downloaded or False if not.
506 By default, "downloading" is implemented so that it
507 reports all episodes as not downloading.
509 return [episode
for episode
in self
.db
.load_episodes(self
, \
510 factory
=self
.episode_factory
, state
=gpodder
.STATE_NORMAL
) if \
511 episode
.check_is_new(downloading
=downloading
)]
513 def get_playlist_filename(self
):
514 # If the save_dir doesn't end with a slash (which it really should
515 # not, if the implementation is correct, we can just append .m3u :)
516 assert self
.save_dir
[-1] != '/'
517 return self
.save_dir
+'.m3u'
519 def update_m3u_playlist(self
):
520 m3u_filename
= self
.get_playlist_filename()
522 downloaded_episodes
= self
.get_downloaded_episodes()
523 if not downloaded_episodes
:
524 log('No episodes - removing %s', m3u_filename
, sender
=self
)
525 util
.delete_file(m3u_filename
)
528 log('Writing playlist to %s', m3u_filename
, sender
=self
)
529 util
.write_m3u_playlist(m3u_filename
, \
530 PodcastEpisode
.sort_by_pubdate(downloaded_episodes
))
532 def get_episode_by_url(self
, url
):
533 return self
.db
.load_single_episode(self
, \
534 factory
=self
.episode_factory
, url
=url
)
536 def get_episode_by_filename(self
, filename
):
537 return self
.db
.load_single_episode(self
, \
538 factory
=self
.episode_factory
, filename
=filename
)
540 def get_all_episodes(self
):
541 return self
.db
.load_episodes(self
, factory
=self
.episode_factory
)
543 def find_unique_folder_name(self
, foldername
):
544 # Remove trailing dots to avoid errors on Windows (bug 600)
545 foldername
= foldername
.strip().rstrip('.')
547 current_try
= util
.sanitize_filename(foldername
, \
548 self
.MAX_FOLDERNAME_LENGTH
)
552 if self
.db
.channel_foldername_exists(current_try
):
553 current_try
= '%s (%d)' % (foldername
, next_try_id
)
558 def get_save_dir(self
):
559 urldigest
= hashlib
.md5(self
.url
).hexdigest()
560 sanitizedurl
= util
.sanitize_filename(self
.url
, self
.MAX_FOLDERNAME_LENGTH
)
561 if self
.foldername
is None or (self
.auto_foldername
and (self
.foldername
== urldigest
or self
.foldername
.startswith(sanitizedurl
))):
562 # we must change the folder name, because it has not been set manually
563 fn_template
= util
.sanitize_filename(self
.title
, self
.MAX_FOLDERNAME_LENGTH
)
565 # if this is an empty string, try the basename
566 if len(fn_template
) == 0:
567 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self
.url
, sender
=self
)
568 fn_template
= util
.sanitize_filename(os
.path
.basename(self
.url
), self
.MAX_FOLDERNAME_LENGTH
)
570 # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
571 if len(fn_template
) == 0:
572 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self
.url
, sender
=self
)
573 fn_template
= urldigest
# no need for sanitize_filename here
575 # Find a unique folder name for this podcast
576 wanted_foldername
= self
.find_unique_folder_name(fn_template
)
578 # if the foldername has not been set, check if the (old) md5 filename exists
579 if self
.foldername
is None and os
.path
.exists(os
.path
.join(self
.download_dir
, urldigest
)):
580 log('Found pre-0.15.0 download folder for %s: %s', self
.title
, urldigest
, sender
=self
)
581 self
.foldername
= urldigest
583 # we have a valid, new folder name in "current_try" -> use that!
584 if self
.foldername
is not None and wanted_foldername
!= self
.foldername
:
585 # there might be an old download folder crawling around - move it!
586 new_folder_name
= os
.path
.join(self
.download_dir
, wanted_foldername
)
587 old_folder_name
= os
.path
.join(self
.download_dir
, self
.foldername
)
588 if os
.path
.exists(old_folder_name
):
589 if not os
.path
.exists(new_folder_name
):
590 # Old folder exists, new folder does not -> simply rename
591 log('Renaming %s => %s', old_folder_name
, new_folder_name
, sender
=self
)
592 os
.rename(old_folder_name
, new_folder_name
)
594 # Both folders exist -> move files and delete old folder
595 log('Moving files from %s to %s', old_folder_name
, new_folder_name
, sender
=self
)
596 for file in glob
.glob(os
.path
.join(old_folder_name
, '*')):
597 shutil
.move(file, new_folder_name
)
598 log('Removing %s', old_folder_name
, sender
=self
)
599 shutil
.rmtree(old_folder_name
, ignore_errors
=True)
600 log('Updating foldername of %s to "%s".', self
.url
, wanted_foldername
, sender
=self
)
601 self
.foldername
= wanted_foldername
604 save_dir
= os
.path
.join(self
.download_dir
, self
.foldername
)
606 # Create save_dir if it does not yet exist
607 if not util
.make_directory( save_dir
):
608 log( 'Could not create save_dir: %s', save_dir
, sender
= self
)
612 save_dir
= property(fget
=get_save_dir
)
614 def remove_downloaded(self
):
615 # Remove the playlist file if it exists
616 m3u_filename
= self
.get_playlist_filename()
617 if os
.path
.exists(m3u_filename
):
618 util
.delete_file(m3u_filename
)
620 # Remove the download directory
621 shutil
.rmtree(self
.save_dir
, True)
624 def cover_file(self
):
625 new_name
= os
.path
.join(self
.save_dir
, 'folder.jpg')
626 if not os
.path
.exists(new_name
):
627 old_names
= ('cover', '.cover')
628 for old_name
in old_names
:
629 filename
= os
.path
.join(self
.save_dir
, old_name
)
630 if os
.path
.exists(filename
):
631 shutil
.move(filename
, new_name
)
636 def delete_episode(self
, episode
):
637 filename
= episode
.local_filename(create
=False, check_only
=True)
638 if filename
is not None:
639 util
.delete_file(filename
)
641 episode
.set_state(gpodder
.STATE_DELETED
)
644 class PodcastEpisode(PodcastModelObject
):
645 """holds data for one object in a channel"""
646 MAX_FILENAME_LENGTH
= 200
648 def _get_played(self
):
649 return self
.is_played
651 def _set_played(self
, played
):
652 self
.is_played
= played
654 # Alias "is_played" to "played" for DB column mapping
655 played
= property(fget
=_get_played
, fset
=_set_played
)
657 def _get_locked(self
):
658 return self
.is_locked
660 def _set_locked(self
, locked
):
661 self
.is_locked
= locked
663 # Alias "is_locked" to "locked" for DB column mapping
664 locked
= property(fget
=_get_locked
, fset
=_set_locked
)
666 def _get_channel_id(self
):
667 return self
.channel
.id
669 def _set_channel_id(self
, channel_id
):
670 assert self
.channel
.id == channel_id
672 # Accessor for the "channel_id" DB column
673 channel_id
= property(fget
=_get_channel_id
, fset
=_set_channel_id
)
676 def sort_by_pubdate(episodes
, reverse
=False):
677 """Sort a list of PodcastEpisode objects chronologically
679 Returns a iterable, sorted sequence of the episodes
681 key_pubdate
= lambda e
: e
.pubDate
682 return sorted(episodes
, key
=key_pubdate
, reverse
=reverse
)
684 def reload_from_db(self
):
686 Re-reads all episode details for this object from the
687 database and updates this object accordingly. Can be
688 used to refresh existing objects when the database has
689 been updated (e.g. the filename has been set after a
690 download where it was not set before the download)
692 d
= self
.db
.load_episode(self
.id)
693 self
.update_from_dict(d
or {})
696 def has_website_link(self
):
697 return bool(self
.link
) and (self
.link
!= self
.url
or \
698 youtube
.is_video_link(self
.link
))
701 def from_feedparser_entry(entry
, channel
, mimetype_prefs
=''):
702 episode
= PodcastEpisode(channel
)
704 # Replace multi-space and newlines with single space (Maemo bug 11173)
705 episode
.title
= re
.sub('\s+', ' ', entry
.get('title', ''))
706 episode
.link
= entry
.get('link', '')
707 if 'content' in entry
and len(entry
['content']) and \
708 entry
['content'][0].type == 'text/html':
709 episode
.description
= entry
['content'][0].value
711 episode
.description
= entry
.get('summary', '')
714 # Parse iTunes-specific podcast duration metadata
715 total_time
= util
.parse_time(entry
.get('itunes_duration', ''))
716 episode
.total_time
= total_time
720 # Fallback to subtitle if summary is not available0
721 if not episode
.description
:
722 episode
.description
= entry
.get('subtitle', '')
724 episode
.guid
= entry
.get('id', '')
725 if entry
.get('updated_parsed', None):
726 episode
.pubDate
= rfc822
.mktime_tz(entry
.updated_parsed
+(0,))
728 enclosures
= entry
.get('enclosures', ())
729 audio_available
= any(e
.get('type', '').startswith('audio/') \
731 video_available
= any(e
.get('type', '').startswith('video/') \
734 # Create the list of preferred mime types
735 mimetype_prefs
= mimetype_prefs
.split(',')
737 def calculate_preference_value(enclosure
):
738 """Calculate preference value of an enclosure
740 This is based on mime types and allows users to prefer
741 certain mime types over others (e.g. MP3 over AAC, ...)
743 mimetype
= enclosure
.get('type', None)
745 # If the mime type is found, return its (zero-based) index
746 return mimetype_prefs
.index(mimetype
)
748 # If it is not found, assume it comes after all listed items
749 return len(mimetype_prefs
)
752 for e
in sorted(enclosures
, key
=calculate_preference_value
):
753 episode
.mimetype
= e
.get('type', 'application/octet-stream')
754 if episode
.mimetype
== '':
755 # See Maemo bug 10036
756 log('Fixing empty mimetype in ugly feed', sender
=episode
)
757 episode
.mimetype
= 'application/octet-stream'
759 if '/' not in episode
.mimetype
:
762 # Skip images in feeds if audio or video is available (bug 979)
763 if episode
.mimetype
.startswith('image/') and \
764 (audio_available
or video_available
):
767 episode
.url
= util
.normalize_feed_url(e
.get('href', ''))
772 episode
.length
= int(e
.length
) or -1
779 for m
in entry
.get('media_content', ()):
780 episode
.mimetype
= m
.get('type', 'application/octet-stream')
781 if '/' not in episode
.mimetype
:
784 episode
.url
= util
.normalize_feed_url(m
.get('url', ''))
789 episode
.length
= int(m
.fileSize
) or -1
795 # Brute-force detection of any links
796 for l
in entry
.get('links', ()):
797 episode
.url
= util
.normalize_feed_url(l
.get('href', ''))
801 if youtube
.is_video_link(episode
.url
):
804 # Check if we can resolve this link to a audio/video file
805 filename
, extension
= util
.filename_from_url(episode
.url
)
806 file_type
= util
.file_type_by_extension(extension
)
807 if file_type
is None and hasattr(l
, 'type'):
808 extension
= util
.extension_from_mimetype(l
.type)
809 file_type
= util
.file_type_by_extension(extension
)
811 # The link points to a audio or video file - use it!
812 if file_type
is not None:
815 # Scan MP3 links in description text
816 mp3s
= re
.compile(r
'http://[^"]*\.mp3')
817 for content
in entry
.get('content', ()):
819 for match
in mp3s
.finditer(html
):
820 episode
.url
= match
.group(0)
825 def __init__(self
, channel
):
827 # Used by Storage for faster saving
832 self
.mimetype
= 'application/octet-stream'
834 self
.description
= ''
836 self
.channel
= channel
839 self
.auto_filename
= 1 # automatically generated filename
841 self
.state
= gpodder
.STATE_NORMAL
842 self
.is_played
= False
844 # Initialize the "is_locked" property
845 self
._is
_locked
= False
846 self
.is_locked
= channel
.channel_is_locked
850 self
.current_position
= 0
851 self
.current_position_updated
= 0
853 def get_is_locked(self
):
854 return self
._is
_locked
856 def set_is_locked(self
, is_locked
):
857 self
._is
_locked
= bool(is_locked
)
859 is_locked
= property(fget
=get_is_locked
, fset
=set_is_locked
)
862 if self
.state
!= gpodder
.STATE_DOWNLOADED
and self
.file_exists():
863 self
.state
= gpodder
.STATE_DOWNLOADED
864 if gpodder
.user_hooks
is not None:
865 gpodder
.user_hooks
.on_episode_save(self
)
866 self
.db
.save_episode(self
)
868 def on_downloaded(self
, filename
):
869 self
.state
= gpodder
.STATE_DOWNLOADED
870 self
.is_played
= False
871 self
.length
= os
.path
.getsize(filename
)
873 if not self
.total_time
:
875 length
= gstreamer
.get_track_length(filename
)
876 if length
is not None:
877 length
= int(length
/1000)
878 log('Detected media length: %d seconds', length
, \
880 self
.total_time
= length
881 self
.db
.save_episode(self
)
885 log('Error while detecting media length: %s', str(e
), \
888 self
.db
.save_downloaded_episode(self
)
891 def set_state(self
, state
):
893 self
.db
.update_episode_state(self
)
895 def mark(self
, state
=None, is_played
=None, is_locked
=None):
896 if state
is not None:
898 if is_played
is not None:
899 self
.is_played
= is_played
900 if is_locked
is not None:
901 self
.is_locked
= is_locked
902 self
.db
.update_episode_state(self
)
905 def title_markup(self
):
906 return '%s\n<small>%s</small>' % (xml
.sax
.saxutils
.escape(self
.title
),
907 xml
.sax
.saxutils
.escape(self
.channel
.title
))
910 def maemo_markup(self
):
912 length_str
= '%s; ' % self
.filesize_prop
915 return ('<b>%s</b>\n<small>%s'+_('released %s')+ \
916 '; '+_('from %s')+'</small>') % (\
917 xml
.sax
.saxutils
.escape(re
.sub('\s+', ' ', self
.title
)), \
918 xml
.sax
.saxutils
.escape(length_str
), \
919 xml
.sax
.saxutils
.escape(self
.pubdate_prop
), \
920 xml
.sax
.saxutils
.escape(re
.sub('\s+', ' ', self
.channel
.title
)))
923 def maemo_remove_markup(self
):
925 played_string
= _('played')
927 played_string
= _('unplayed')
928 downloaded_string
= self
.get_age_string()
929 if not downloaded_string
:
930 downloaded_string
= _('today')
931 return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
932 '; '+_('from %s')+'</small>') % (\
933 xml
.sax
.saxutils
.escape(self
.title
), \
934 xml
.sax
.saxutils
.escape(self
.filesize_prop
), \
935 xml
.sax
.saxutils
.escape(played_string
), \
936 xml
.sax
.saxutils
.escape(downloaded_string
), \
937 xml
.sax
.saxutils
.escape(self
.channel
.title
))
939 def age_in_days(self
):
940 return util
.file_age_in_days(self
.local_filename(create
=False, \
943 age_int_prop
= property(fget
=age_in_days
)
945 def get_age_string(self
):
946 return util
.file_age_to_string(self
.age_in_days())
948 age_prop
= property(fget
=get_age_string
)
950 def one_line_description( self
):
951 lines
= util
.remove_html_tags(self
.description
or '').strip().splitlines()
952 if not lines
or lines
[0] == '':
953 return _('No description available')
955 return ' '.join(lines
)
957 def delete_from_disk(self
):
959 self
.channel
.delete_episode(self
)
961 log('Cannot delete episode from disk: %s', self
.title
, traceback
=True, sender
=self
)
963 def find_unique_file_name(self
, url
, filename
, extension
):
964 current_try
= util
.sanitize_filename(filename
, self
.MAX_FILENAME_LENGTH
)+extension
968 if self
.filename
== current_try
and current_try
is not None:
969 # We already have this filename - good!
972 while self
.db
.episode_filename_exists(current_try
):
973 current_try
= '%s (%d)%s' % (filename
, next_try_id
, extension
)
978 def local_filename(self
, create
, force_update
=False, check_only
=False,
980 """Get (and possibly generate) the local saving filename
982 Pass create=True if you want this function to generate a
983 new filename if none exists. You only want to do this when
984 planning to create/download the file after calling this function.
986 Normally, you should pass create=False. This will only
987 create a filename when the file already exists from a previous
988 version of gPodder (where we used md5 filenames). If the file
989 does not exist (and the filename also does not exist), this
990 function will return None.
992 If you pass force_update=True to this function, it will try to
993 find a new (better) filename and move the current file if this
994 is the case. This is useful if (during the download) you get
995 more information about the file, e.g. the mimetype and you want
996 to include this information in the file name generation process.
998 If check_only=True is passed to this function, it will never try
999 to rename the file, even if would be a good idea. Use this if you
1000 only want to check if a file exists.
1002 If "template" is specified, it should be a filename that is to
1003 be used as a template for generating the "real" filename.
1005 The generated filename is stored in the database for future access.
1007 ext
= self
.extension(may_call_local_filename
=False).encode('utf-8', 'ignore')
1009 # For compatibility with already-downloaded episodes, we
1010 # have to know md5 filenames if they are downloaded already
1011 urldigest
= hashlib
.md5(self
.url
).hexdigest()
1013 if not create
and self
.filename
is None:
1014 urldigest_filename
= os
.path
.join(self
.channel
.save_dir
, urldigest
+ext
)
1015 if os
.path
.exists(urldigest_filename
):
1016 # The file exists, so set it up in our database
1017 log('Recovering pre-0.15.0 file: %s', urldigest_filename
, sender
=self
)
1018 self
.filename
= urldigest
+ext
1019 self
.auto_filename
= 1
1021 return urldigest_filename
1024 # We only want to check if the file exists, so don't try to
1025 # rename the file, even if it would be reasonable. See also:
1026 # http://bugs.gpodder.org/attachment.cgi?id=236
1028 if self
.filename
is None:
1031 return os
.path
.join(self
.channel
.save_dir
, self
.filename
)
1033 if self
.filename
is None or force_update
or (self
.auto_filename
and self
.filename
== urldigest
+ext
):
1034 # Try to find a new filename for the current file
1035 if template
is not None:
1036 # If template is specified, trust the template's extension
1037 episode_filename
, ext
= os
.path
.splitext(template
)
1039 episode_filename
, extension_UNUSED
= util
.filename_from_url(self
.url
)
1040 fn_template
= util
.sanitize_filename(episode_filename
, self
.MAX_FILENAME_LENGTH
)
1042 if 'redirect' in fn_template
and template
is None:
1043 # This looks like a redirection URL - force URL resolving!
1044 log('Looks like a redirection to me: %s', self
.url
, sender
=self
)
1045 url
= util
.get_real_url(self
.channel
.authenticate_url(self
.url
))
1046 log('Redirection resolved to: %s', url
, sender
=self
)
1047 (episode_filename
, extension_UNUSED
) = util
.filename_from_url(url
)
1048 fn_template
= util
.sanitize_filename(episode_filename
, self
.MAX_FILENAME_LENGTH
)
1050 # Use the video title for YouTube downloads
1051 for yt_url
in ('http://youtube.com/', 'http://www.youtube.com/'):
1052 if self
.url
.startswith(yt_url
):
1053 fn_template
= util
.sanitize_filename(os
.path
.basename(self
.title
), self
.MAX_FILENAME_LENGTH
)
1055 # If the basename is empty, use the md5 hexdigest of the URL
1056 if len(fn_template
) == 0 or fn_template
.startswith('redirect.'):
1057 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self
.channel
.url
, self
.url
, sender
=self
)
1058 fn_template
= urldigest
1060 # Find a unique filename for this episode
1061 wanted_filename
= self
.find_unique_file_name(self
.url
, fn_template
, ext
)
1063 # We populate the filename field the first time - does the old file still exist?
1064 if self
.filename
is None and os
.path
.exists(os
.path
.join(self
.channel
.save_dir
, urldigest
+ext
)):
1065 log('Found pre-0.15.0 downloaded file: %s', urldigest
, sender
=self
)
1066 self
.filename
= urldigest
+ext
1068 # The old file exists, but we have decided to want a different filename
1069 if self
.filename
is not None and wanted_filename
!= self
.filename
:
1070 # there might be an old download folder crawling around - move it!
1071 new_file_name
= os
.path
.join(self
.channel
.save_dir
, wanted_filename
)
1072 old_file_name
= os
.path
.join(self
.channel
.save_dir
, self
.filename
)
1073 if os
.path
.exists(old_file_name
) and not os
.path
.exists(new_file_name
):
1074 log('Renaming %s => %s', old_file_name
, new_file_name
, sender
=self
)
1075 os
.rename(old_file_name
, new_file_name
)
1076 elif force_update
and not os
.path
.exists(old_file_name
):
1077 # When we call force_update, the file might not yet exist when we
1078 # call it from the downloading code before saving the file
1079 log('Choosing new filename: %s', new_file_name
, sender
=self
)
1081 log('Warning: %s exists or %s does not.', new_file_name
, old_file_name
, sender
=self
)
1082 log('Updating filename of %s to "%s".', self
.url
, wanted_filename
, sender
=self
)
1083 elif self
.filename
is None:
1084 log('Setting filename to "%s".', wanted_filename
, sender
=self
)
1086 log('Should update filename. Stays the same (%s). Good!', \
1087 wanted_filename
, sender
=self
)
1088 self
.filename
= wanted_filename
1092 return os
.path
.join(self
.channel
.save_dir
, self
.filename
)
1094 def set_mimetype(self
, mimetype
, commit
=False):
1095 """Sets the mimetype for this episode"""
1096 self
.mimetype
= mimetype
1100 def extension(self
, may_call_local_filename
=True):
1101 filename
, ext
= util
.filename_from_url(self
.url
)
1102 if may_call_local_filename
:
1103 filename
= self
.local_filename(create
=False)
1104 if filename
is not None:
1105 filename
, ext
= os
.path
.splitext(filename
)
1106 # if we can't detect the extension from the url fallback on the mimetype
1107 if ext
== '' or util
.file_type_by_extension(ext
) is None:
1108 ext
= util
.extension_from_mimetype(self
.mimetype
)
1111 def check_is_new(self
, downloading
=lambda e
: False):
1113 Returns True if this episode is to be considered new.
1114 "Downloading" should be a callback that gets an episode
1115 as its parameter and returns True if the episode is
1116 being downloaded at the moment.
1118 return self
.state
== gpodder
.STATE_NORMAL
and \
1119 not self
.is_played
and \
1120 not downloading(self
)
1123 self
.state
= gpodder
.STATE_NORMAL
1124 self
.is_played
= False
1125 self
.db
.update_episode_state(self
)
1128 self
.is_played
= True
1129 self
.db
.update_episode_state(self
)
1131 def file_exists(self
):
1132 filename
= self
.local_filename(create
=False, check_only
=True)
1133 if filename
is None:
1136 return os
.path
.exists(filename
)
1138 def was_downloaded(self
, and_exists
=False):
1139 if self
.state
!= gpodder
.STATE_DOWNLOADED
:
1141 if and_exists
and not self
.file_exists():
1145 def sync_filename(self
, use_custom
=False, custom_format
=None):
1147 return util
.object_string_formatter(custom_format
,
1148 episode
=self
, podcast
=self
.channel
)
1152 def file_type(self
):
1153 # Assume all YouTube links are video files
1154 if youtube
.is_video_link(self
.url
):
1157 return util
.file_type_by_extension(self
.extension())
1160 def basename( self
):
1161 return os
.path
.splitext( os
.path
.basename( self
.url
))[0]
1164 def published( self
):
1166 Returns published date as YYYYMMDD (or 00000000 if not available)
1169 return datetime
.datetime
.fromtimestamp(self
.pubDate
).strftime('%Y%m%d')
1171 log( 'Cannot format pubDate for "%s".', self
.title
, sender
= self
)
1177 Returns published time as HHMM (or 0000 if not available)
1180 return datetime
.datetime
.fromtimestamp(self
.pubDate
).strftime('%H%M')
1182 log('Cannot format pubDate (time) for "%s".', self
.title
, sender
=self
)
1185 def playlist_title(self
):
1186 """Return a title for this episode in a playlist
1188 The title will be composed of the podcast name, the
1189 episode name and the publication date. The return
1190 value is the canonical representation of this episode
1191 in playlists (for example, M3U playlists).
1193 return '%s - %s (%s)' % (self
.channel
.title
, \
1195 self
.cute_pubdate())
1197 def cute_pubdate(self
):
1198 result
= util
.format_date(self
.pubDate
)
1200 return '(%s)' % _('unknown')
1204 pubdate_prop
= property(fget
=cute_pubdate
)
1206 def calculate_filesize( self
):
1207 filename
= self
.local_filename(create
=False)
1208 if filename
is None:
1209 log('calculate_filesized called, but filename is None!', sender
=self
)
1211 self
.length
= os
.path
.getsize(filename
)
1213 log( 'Could not get filesize for %s.', self
.url
)
1215 def get_play_info_string(self
):
1216 if self
.current_position
> 0 and \
1217 self
.total_time
<= self
.current_position
:
1218 return '%s (%s)' % (_('Finished'), self
.get_duration_string(),)
1219 if self
.current_position
> 0:
1220 return '%s / %s' % (self
.get_position_string(), \
1221 self
.get_duration_string())
1223 return self
.get_duration_string()
1225 def get_position_string(self
):
1226 return util
.format_time(self
.current_position
)
1228 def get_duration_string(self
):
1229 return util
.format_time(self
.total_time
)
1231 def get_filesize_string(self
):
1232 return util
.format_filesize(self
.length
)
1234 filesize_prop
= property(fget
=get_filesize_string
)
1236 def get_played_string( self
):
1237 if not self
.is_played
:
1238 return _('Unplayed')
1242 played_prop
= property(fget
=get_played_string
)
1244 def is_duplicate(self
, episode
):
1245 if self
.title
== episode
.title
and self
.pubDate
== episode
.pubDate
:
1246 log('Possible duplicate detected: %s', self
.title
)
1250 def duplicate_id(self
):
1251 return hash((self
.title
, self
.pubDate
))
1253 def update_from(self
, episode
):
1254 for k
in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
1255 setattr(self
, k
, getattr(episode
, k
))