1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # gpodder.model - Core model classes for gPodder (2009-08-13)
23 # Based on libpodcasts.py (thp, 2005-10-29)
27 from gpodder
import util
28 from gpodder
import feedcore
29 from gpodder
import youtube
30 from gpodder
import corestats
32 from gpodder
.liblogger
import log
45 import xml
.sax
.saxutils
50 class CustomFeed(feedcore
.ExceptionWithData
): pass
52 class gPodderFetcher(feedcore
.Fetcher
):
54 This class extends the feedcore Fetcher with the gPodder User-Agent and the
55 Proxy handler based on the current settings in gPodder and provides a
56 convenience method (fetch_channel) for use by PodcastChannel objects.
61 feedcore
.Fetcher
.__init
__(self
, gpodder
.user_agent
)
63 def fetch_channel(self
, channel
):
65 modified
= feedparser
._parse
_date
(channel
.last_modified
)
66 # If we have a username or password, rebuild the url with them included
67 # Note: using a HTTPBasicAuthHandler would be pain because we need to
68 # know the realm. It can be done, but I think this method works, too
69 url
= channel
.authenticate_url(channel
.url
)
70 for handler
in self
.custom_handlers
:
71 custom_feed
= handler
.handle_url(url
)
72 if custom_feed
is not None:
73 raise CustomFeed(custom_feed
)
74 self
.fetch(url
, etag
, modified
)
76 def _resolve_url(self
, url
):
77 return youtube
.get_real_channel_url(url
)
80 def register(cls
, handler
):
81 cls
.custom_handlers
.append(handler
)
83 # def _get_handlers(self):
84 # # Add a ProxyHandler for fetching data via a proxy server
85 # proxies = {'http': 'http://proxy.example.org:8080'}
86 # return[urllib2.ProxyHandler(proxies))]
88 # The "register" method is exposed here for external usage
89 register_custom_handler
= gPodderFetcher
.register
91 class PodcastModelObject(object):
93 A generic base class for our podcast model providing common helper
94 and utility functions.
98 def create_from_dict(cls
, d
, *args
):
100 Create a new object, passing "args" to the constructor
101 and then updating the object with the values from "d".
104 o
.update_from_dict(d
)
107 def update_from_dict(self
, d
):
109 Updates the attributes of this object with values from the
110 dictionary "d" by using the keys found in "d".
114 setattr(self
, k
, d
[k
])
117 class PodcastChannel(PodcastModelObject
):
118 """holds data for a complete channel"""
119 MAX_FOLDERNAME_LENGTH
= 150
120 SECONDS_PER_WEEK
= 7*24*60*60
122 feed_fetcher
= gPodderFetcher()
125 def build_factory(cls
, download_dir
):
126 def factory(dict, db
):
127 return cls
.create_from_dict(dict, db
, download_dir
)
131 def load_from_db(cls
, db
, download_dir
):
132 return db
.load_channels(factory
=cls
.build_factory(download_dir
))
135 def load(cls
, db
, url
, create
=True, authentication_tokens
=None,\
136 max_episodes
=0, download_dir
=None, allow_empty_feeds
=False):
137 if isinstance(url
, unicode):
138 url
= url
.encode('utf-8')
140 tmp
= db
.load_channels(factory
=cls
.build_factory(download_dir
), url
=url
)
144 tmp
= PodcastChannel(db
, download_dir
)
146 if authentication_tokens
is not None:
147 tmp
.username
= authentication_tokens
[0]
148 tmp
.password
= authentication_tokens
[1]
150 tmp
.update(max_episodes
)
152 db
.force_last_new(tmp
)
153 # Subscribing to empty feeds should yield an error (except if
154 # the user specifically allows empty feeds in the config UI)
155 if sum(tmp
.get_statistics()) == 0 and not allow_empty_feeds
:
157 raise Exception(_('No downloadable episodes in feed'))
160 def episode_factory(self
, d
, db__parameter_is_unused
=None):
162 This function takes a dictionary containing key-value pairs for
163 episodes and returns a new PodcastEpisode object that is connected
164 to this PodcastChannel object.
166 Returns: A new PodcastEpisode object
168 return PodcastEpisode
.create_from_dict(d
, self
)
170 def _consume_custom_feed(self
, custom_feed
, max_episodes
=0):
171 self
.title
= custom_feed
.get_title()
172 self
.link
= custom_feed
.get_link()
173 self
.description
= custom_feed
.get_description()
174 self
.image
= custom_feed
.get_image()
175 self
.pubDate
= time
.time()
178 guids
= [episode
.guid
for episode
in self
.get_all_episodes()]
181 self
.db
.purge(max_episodes
, self
.id)
183 def _consume_updated_feed(self
, feed
, max_episodes
=0):
184 self
.parse_error
= feed
.get('bozo_exception', None)
186 self
.title
= feed
.feed
.get('title', self
.url
)
187 self
.link
= feed
.feed
.get('link', self
.link
)
188 self
.description
= feed
.feed
.get('subtitle', self
.description
)
189 # Start YouTube-specific title FIX
190 YOUTUBE_PREFIX
= 'Uploads by '
191 if self
.title
.startswith(YOUTUBE_PREFIX
):
192 self
.title
= self
.title
[len(YOUTUBE_PREFIX
):] + ' on YouTube'
193 # End YouTube-specific title FIX
196 self
.pubDate
= rfc822
.mktime_tz(feed
.feed
.get('updated_parsed', None+(0,)))
198 self
.pubDate
= time
.time()
200 if hasattr(feed
.feed
, 'image'):
201 for attribute
in ('href', 'url'):
202 new_value
= getattr(feed
.feed
.image
, attribute
, None)
203 if new_value
is not None:
204 log('Found cover art in %s: %s', attribute
, new_value
)
205 self
.image
= new_value
207 if hasattr(feed
.feed
, 'icon'):
208 self
.image
= feed
.feed
.icon
212 # Load all episodes to update them properly.
213 existing
= self
.get_all_episodes()
215 # We can limit the maximum number of entries that gPodder will parse
216 if max_episodes
> 0 and len(feed
.entries
) > max_episodes
:
217 entries
= feed
.entries
[:max_episodes
]
219 entries
= feed
.entries
221 # Title + PubDate hashes for existing episodes
222 existing_dupes
= dict((e
.duplicate_id(), e
) for e
in existing
)
224 # GUID-based existing episode list
225 existing_guids
= dict((e
.guid
, e
) for e
in existing
)
227 # Get most recent pubDate of all episodes
228 last_pubdate
= self
.db
.get_last_pubdate(self
) or 0
230 # Search all entries for new episodes
231 for entry
in entries
:
233 episode
= PodcastEpisode
.from_feedparser_entry(entry
, self
)
234 if episode
is not None and not episode
.title
:
235 episode
.title
, ext
= os
.path
.splitext(os
.path
.basename(episode
.url
))
237 log('Cannot instantiate episode: %s. Skipping.', e
, sender
=self
, traceback
=True)
243 # Detect (and update) existing episode based on GUIDs
244 existing_episode
= existing_guids
.get(episode
.guid
, None)
246 existing_episode
.update_from(episode
)
247 existing_episode
.save()
250 # Detect (and update) existing episode based on duplicate ID
251 existing_episode
= existing_dupes
.get(episode
.duplicate_id(), None)
253 if existing_episode
.is_duplicate(episode
):
254 existing_episode
.update_from(episode
)
255 existing_episode
.save()
258 # Workaround for bug 340: If the episode has been
259 # published earlier than one week before the most
260 # recent existing episode, do not mark it as new.
261 if episode
.pubDate
< last_pubdate
- self
.SECONDS_PER_WEEK
:
262 log('Episode with old date: %s', episode
.title
, sender
=self
)
263 episode
.is_played
= True
267 # Remove "unreachable" episodes - episodes that have not been
268 # downloaded and that the feed does not list as downloadable anymore
269 if self
.id is not None:
270 seen_guids
= set(e
.guid
for e
in feed
.entries
if hasattr(e
, 'guid'))
271 episodes_to_purge
= (e
for e
in existing
if \
272 e
.state
!= gpodder
.STATE_DOWNLOADED
and \
273 e
.guid
not in seen_guids
and e
.guid
is not None)
274 for episode
in episodes_to_purge
:
275 log('Episode removed from feed: %s (%s)', episode
.title
, \
276 episode
.guid
, sender
=self
)
277 self
.db
.delete_episode_by_guid(episode
.guid
, self
.id)
279 # This *might* cause episodes to be skipped if there were more than
280 # max_episodes_per_feed items added to the feed between updates.
281 # The benefit is that it prevents old episodes from apearing as new
282 # in certain situations (see bug #340).
283 self
.db
.purge(max_episodes
, self
.id)
285 def update_channel_lock(self
):
286 self
.db
.update_channel_lock(self
)
288 def _update_etag_modified(self
, feed
):
289 self
.updated_timestamp
= time
.time()
290 self
.calculate_publish_behaviour()
291 self
.etag
= feed
.headers
.get('etag', self
.etag
)
292 self
.last_modified
= feed
.headers
.get('last-modified', self
.last_modified
)
294 def query_automatic_update(self
):
295 """Query if this channel should be updated automatically
297 Returns True if the update should happen in automatic
298 mode or False if this channel should be skipped (timeout
299 not yet reached or release not expected right now).
301 updated
= self
.updated_timestamp
302 expected
= self
.release_expected
305 one_day_ago
= now
- 60*60*24
306 lastcheck
= now
- 60*10
308 return updated
< one_day_ago
or \
309 (expected
< now
and updated
< lastcheck
)
311 def update(self
, max_episodes
=0):
313 self
.feed_fetcher
.fetch_channel(self
)
314 except CustomFeed
, updated
:
315 custom_feed
= updated
.data
316 self
._consume
_custom
_feed
(custom_feed
, max_episodes
)
318 except feedcore
.UpdatedFeed
, updated
:
320 self
._consume
_updated
_feed
(feed
, max_episodes
)
321 self
._update
_etag
_modified
(feed
)
323 except feedcore
.NewLocation
, updated
:
326 self
._consume
_updated
_feed
(feed
, max_episodes
)
327 self
._update
_etag
_modified
(feed
)
329 except feedcore
.NotModified
, updated
:
331 self
._update
_etag
_modified
(feed
)
334 # "Not really" errors
335 #feedcore.AuthenticationRequired
339 #feedcore.InternalServerError
342 #feedcore.Unsubscribe
344 #feedcore.InvalidFeed
345 #feedcore.UnknownStatusCode
351 self
.db
.delete_channel(self
)
354 self
.db
.save_channel(self
)
356 def get_statistics(self
):
358 return (0, 0, 0, 0, 0)
360 return self
.db
.get_channel_count(int(self
.id))
362 def authenticate_url(self
, url
):
363 return util
.url_add_authentication(url
, self
.username
, self
.password
)
365 def __init__(self
, db
, download_dir
):
367 self
.download_dir
= download_dir
372 self
.description
= ''
375 self
.parse_error
= None
376 self
.foldername
= None
377 self
.auto_foldername
= 1 # automatically generated foldername
379 # should this channel be synced to devices? (ex: iPod)
380 self
.sync_to_devices
= True
381 # to which playlist should be synced
382 self
.device_playlist_name
= 'gPodder'
383 # if set, this overrides the channel-provided title
384 self
.override_title
= ''
388 self
.last_modified
= None
391 self
.save_dir_size
= 0
392 self
.__save
_dir
_size
_set
= False
394 self
.channel_is_locked
= False
396 self
.release_expected
= time
.time()
397 self
.release_deviation
= 0
398 self
.updated_timestamp
= 0
400 def calculate_publish_behaviour(self
):
401 episodes
= self
.db
.load_episodes(self
, factory
=self
.episode_factory
, limit
=30)
402 if len(episodes
) < 3:
406 latest
= max(e
.pubDate
for e
in episodes
)
407 for index
in range(len(episodes
)-1):
408 if episodes
[index
].pubDate
!= 0 and episodes
[index
+1].pubDate
!= 0:
409 deltas
.append(episodes
[index
].pubDate
- episodes
[index
+1].pubDate
)
412 stats
= corestats
.Stats(deltas
)
413 self
.release_expected
= min([latest
+stats
.stdev(), latest
+(stats
.min()+stats
.avg())*.5])
414 self
.release_deviation
= stats
.stdev()
416 self
.release_expected
= latest
417 self
.release_deviation
= 0
419 def request_save_dir_size(self
):
420 if not self
.__save
_dir
_size
_set
:
421 self
.update_save_dir_size()
422 self
.__save
_dir
_size
_set
= True
424 def update_save_dir_size(self
):
425 self
.save_dir_size
= util
.calculate_size(self
.save_dir
)
427 def get_title( self
):
428 if self
.override_title
:
429 return self
.override_title
430 elif not self
.__title
.strip():
435 def set_title( self
, value
):
436 self
.__title
= value
.strip()
438 title
= property(fget
=get_title
,
441 def set_custom_title( self
, custom_title
):
442 custom_title
= custom_title
.strip()
444 # if the custom title is the same as we have
445 if custom_title
== self
.override_title
:
448 # if custom title is the same as channel title and we didn't have a custom title
449 if custom_title
== self
.__title
and self
.override_title
== '':
452 # make sure self.foldername is initialized
455 # rename folder if custom_title looks sane
456 new_folder_name
= self
.find_unique_folder_name(custom_title
)
457 if len(new_folder_name
) > 0 and new_folder_name
!= self
.foldername
:
458 log('Changing foldername based on custom title: %s', custom_title
, sender
=self
)
459 new_folder
= os
.path
.join(self
.download_dir
, new_folder_name
)
460 old_folder
= os
.path
.join(self
.download_dir
, self
.foldername
)
461 if os
.path
.exists(old_folder
):
462 if not os
.path
.exists(new_folder
):
463 # Old folder exists, new folder does not -> simply rename
464 log('Renaming %s => %s', old_folder
, new_folder
, sender
=self
)
465 os
.rename(old_folder
, new_folder
)
467 # Both folders exist -> move files and delete old folder
468 log('Moving files from %s to %s', old_folder
, new_folder
, sender
=self
)
469 for file in glob
.glob(os
.path
.join(old_folder
, '*')):
470 shutil
.move(file, new_folder
)
471 log('Removing %s', old_folder
, sender
=self
)
472 shutil
.rmtree(old_folder
, ignore_errors
=True)
473 self
.foldername
= new_folder_name
476 if custom_title
!= self
.__title
:
477 self
.override_title
= custom_title
479 self
.override_title
= ''
481 def get_downloaded_episodes(self
):
482 return self
.db
.load_episodes(self
, factory
=self
.episode_factory
, state
=gpodder
.STATE_DOWNLOADED
)
484 def get_new_episodes(self
, downloading
=lambda e
: False):
486 Get a list of new episodes. You can optionally specify
487 "downloading" as a callback that takes an episode as
488 a parameter and returns True if the episode is currently
489 being downloaded or False if not.
491 By default, "downloading" is implemented so that it
492 reports all episodes as not downloading.
494 return [episode
for episode
in self
.db
.load_episodes(self
, \
495 factory
=self
.episode_factory
) if \
496 episode
.check_is_new(downloading
=downloading
)]
498 def get_playlist_filename(self
):
499 # If the save_dir doesn't end with a slash (which it really should
500 # not, if the implementation is correct, we can just append .m3u :)
501 assert self
.save_dir
[-1] != '/'
502 return self
.save_dir
+'.m3u'
504 def update_m3u_playlist(self
):
505 m3u_filename
= self
.get_playlist_filename()
507 downloaded_episodes
= self
.get_downloaded_episodes()
508 if not downloaded_episodes
:
509 log('No episodes - removing %s', m3u_filename
, sender
=self
)
510 util
.delete_file(m3u_filename
)
513 log('Writing playlist to %s', m3u_filename
, sender
=self
)
514 f
= open(m3u_filename
, 'w')
517 for episode
in PodcastEpisode
.sort_by_pubdate(downloaded_episodes
):
518 if episode
.was_downloaded(and_exists
=True):
519 filename
= episode
.local_filename(create
=False)
520 assert filename
is not None
522 if os
.path
.dirname(filename
).startswith(os
.path
.dirname(m3u_filename
)):
523 filename
= filename
[len(os
.path
.dirname(m3u_filename
)+os
.sep
):]
524 f
.write('#EXTINF:0,'+self
.title
+' - '+episode
.title
+' ('+episode
.cute_pubdate()+')\n')
525 f
.write(filename
+'\n')
529 def get_all_episodes(self
):
530 return self
.db
.load_episodes(self
, factory
=self
.episode_factory
)
532 def find_unique_folder_name(self
, foldername
):
533 # Remove trailing dots to avoid errors on Windows (bug 600)
534 foldername
= foldername
.strip().rstrip('.')
536 current_try
= util
.sanitize_filename(foldername
, \
537 self
.MAX_FOLDERNAME_LENGTH
)
541 if not os
.path
.exists(os
.path
.join(self
.download_dir
, current_try
)):
542 self
.db
.remove_foldername_if_deleted_channel(current_try
)
544 if self
.db
.channel_foldername_exists(current_try
):
545 current_try
= '%s (%d)' % (foldername
, next_try_id
)
550 def get_save_dir(self
):
551 urldigest
= hashlib
.md5(self
.url
).hexdigest()
552 sanitizedurl
= util
.sanitize_filename(self
.url
, self
.MAX_FOLDERNAME_LENGTH
)
553 if self
.foldername
is None or (self
.auto_foldername
and (self
.foldername
== urldigest
or self
.foldername
.startswith(sanitizedurl
))):
554 # we must change the folder name, because it has not been set manually
555 fn_template
= util
.sanitize_filename(self
.title
, self
.MAX_FOLDERNAME_LENGTH
)
557 # if this is an empty string, try the basename
558 if len(fn_template
) == 0:
559 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self
.url
, sender
=self
)
560 fn_template
= util
.sanitize_filename(os
.path
.basename(self
.url
), self
.MAX_FOLDERNAME_LENGTH
)
562 # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
563 if len(fn_template
) == 0:
564 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self
.url
, sender
=self
)
565 fn_template
= urldigest
# no need for sanitize_filename here
567 # Find a unique folder name for this podcast
568 wanted_foldername
= self
.find_unique_folder_name(fn_template
)
570 # if the foldername has not been set, check if the (old) md5 filename exists
571 if self
.foldername
is None and os
.path
.exists(os
.path
.join(self
.download_dir
, urldigest
)):
572 log('Found pre-0.15.0 download folder for %s: %s', self
.title
, urldigest
, sender
=self
)
573 self
.foldername
= urldigest
575 # we have a valid, new folder name in "current_try" -> use that!
576 if self
.foldername
is not None and wanted_foldername
!= self
.foldername
:
577 # there might be an old download folder crawling around - move it!
578 new_folder_name
= os
.path
.join(self
.download_dir
, wanted_foldername
)
579 old_folder_name
= os
.path
.join(self
.download_dir
, self
.foldername
)
580 if os
.path
.exists(old_folder_name
):
581 if not os
.path
.exists(new_folder_name
):
582 # Old folder exists, new folder does not -> simply rename
583 log('Renaming %s => %s', old_folder_name
, new_folder_name
, sender
=self
)
584 os
.rename(old_folder_name
, new_folder_name
)
586 # Both folders exist -> move files and delete old folder
587 log('Moving files from %s to %s', old_folder_name
, new_folder_name
, sender
=self
)
588 for file in glob
.glob(os
.path
.join(old_folder_name
, '*')):
589 shutil
.move(file, new_folder_name
)
590 log('Removing %s', old_folder_name
, sender
=self
)
591 shutil
.rmtree(old_folder_name
, ignore_errors
=True)
592 log('Updating foldername of %s to "%s".', self
.url
, wanted_foldername
, sender
=self
)
593 self
.foldername
= wanted_foldername
596 save_dir
= os
.path
.join(self
.download_dir
, self
.foldername
)
598 # Create save_dir if it does not yet exist
599 if not util
.make_directory( save_dir
):
600 log( 'Could not create save_dir: %s', save_dir
, sender
= self
)
604 save_dir
= property(fget
=get_save_dir
)
606 def remove_downloaded( self
):
607 shutil
.rmtree( self
.save_dir
, True)
610 def cover_file(self
):
611 new_name
= os
.path
.join(self
.save_dir
, 'folder.jpg')
612 if not os
.path
.exists(new_name
):
613 old_names
= ('cover', '.cover')
614 for old_name
in old_names
:
615 filename
= os
.path
.join(self
.save_dir
, old_name
)
616 if os
.path
.exists(filename
):
617 shutil
.move(filename
, new_name
)
622 def delete_episode(self
, episode
):
623 filename
= episode
.local_filename(create
=False, check_only
=True)
624 if filename
is not None:
625 util
.delete_file(filename
)
627 episode
.set_state(gpodder
.STATE_DELETED
)
630 class PodcastEpisode(PodcastModelObject
):
631 """holds data for one object in a channel"""
632 MAX_FILENAME_LENGTH
= 200
634 def _get_played(self
):
635 return self
.is_played
637 def _set_played(self
, played
):
638 self
.is_played
= played
640 # Alias "is_played" to "played" for DB column mapping
641 played
= property(fget
=_get_played
, fset
=_set_played
)
643 def _get_locked(self
):
644 return self
.is_locked
646 def _set_locked(self
, locked
):
647 self
.is_locked
= locked
649 # Alias "is_locked" to "locked" for DB column mapping
650 locked
= property(fget
=_get_locked
, fset
=_set_locked
)
652 def _get_channel_id(self
):
653 return self
.channel
.id
655 def _set_channel_id(self
, channel_id
):
656 assert self
.channel
.id == channel_id
658 # Accessor for the "channel_id" DB column
659 channel_id
= property(fget
=_get_channel_id
, fset
=_set_channel_id
)
662 def sort_by_pubdate(episodes
, reverse
=False):
663 """Sort a list of PodcastEpisode objects chronologically
665 Returns a iterable, sorted sequence of the episodes
667 key_pubdate
= lambda e
: e
.pubDate
668 return sorted(episodes
, key
=key_pubdate
, reverse
=reverse
)
670 def reload_from_db(self
):
672 Re-reads all episode details for this object from the
673 database and updates this object accordingly. Can be
674 used to refresh existing objects when the database has
675 been updated (e.g. the filename has been set after a
676 download where it was not set before the download)
678 d
= self
.db
.load_episode(self
.id)
679 self
.update_from_dict(d
or {})
682 def has_website_link(self
):
683 return bool(self
.link
) and (self
.link
!= self
.url
)
686 def from_feedparser_entry(entry
, channel
):
687 episode
= PodcastEpisode(channel
)
689 episode
.title
= entry
.get('title', '')
690 episode
.link
= entry
.get('link', '')
691 episode
.description
= entry
.get('summary', '')
693 # Fallback to subtitle if summary is not available0
694 if not episode
.description
:
695 episode
.description
= entry
.get('subtitle', '')
697 episode
.guid
= entry
.get('id', '')
698 if entry
.get('updated_parsed', None):
699 episode
.pubDate
= rfc822
.mktime_tz(entry
.updated_parsed
+(0,))
702 for e
in entry
.get('enclosures', ()):
703 episode
.mimetype
= e
.get('type', 'application/octet-stream')
704 if '/' not in episode
.mimetype
:
707 episode
.url
= util
.normalize_feed_url(e
.get('href', ''))
712 episode
.length
= int(e
.length
) or -1
719 for m
in entry
.get('media_content', ()):
720 episode
.mimetype
= m
.get('type', 'application/octet-stream')
721 if '/' not in episode
.mimetype
:
724 episode
.url
= util
.normalize_feed_url(m
.get('url', ''))
729 episode
.length
= int(m
.fileSize
) or -1
735 # Brute-force detection of any links
736 for l
in entry
.get('links', ()):
737 episode
.url
= util
.normalize_feed_url(l
.get('href', ''))
741 if youtube
.is_video_link(episode
.url
):
744 # Check if we can resolve this link to a audio/video file
745 filename
, extension
= util
.filename_from_url(episode
.url
)
746 file_type
= util
.file_type_by_extension(extension
)
747 if file_type
is None and hasattr(l
, 'type'):
748 extension
= util
.extension_from_mimetype(l
.type)
749 file_type
= util
.file_type_by_extension(extension
)
751 # The link points to a audio or video file - use it!
752 if file_type
is not None:
755 # Scan MP3 links in description text
756 mp3s
= re
.compile(r
'http://[^"]*\.mp3')
757 for content
in entry
.get('content', ()):
759 for match
in mp3s
.finditer(html
):
760 episode
.url
= match
.group(0)
765 def __init__(self
, channel
):
767 # Used by Storage for faster saving
772 self
.mimetype
= 'application/octet-stream'
774 self
.description
= ''
776 self
.channel
= channel
779 self
.auto_filename
= 1 # automatically generated filename
781 self
.state
= gpodder
.STATE_NORMAL
782 self
.is_played
= False
784 # Initialize the "is_locked" property
785 self
._is
_locked
= False
786 self
.is_locked
= channel
.channel_is_locked
790 self
.current_position
= 0
791 self
.current_position_updated
= time
.time()
793 def get_is_locked(self
):
794 return self
._is
_locked
796 def set_is_locked(self
, is_locked
):
797 self
._is
_locked
= bool(is_locked
)
799 is_locked
= property(fget
=get_is_locked
, fset
=set_is_locked
)
802 if self
.state
!= gpodder
.STATE_DOWNLOADED
and self
.file_exists():
803 self
.state
= gpodder
.STATE_DOWNLOADED
804 self
.db
.save_episode(self
)
806 def on_downloaded(self
, filename
):
807 self
.state
= gpodder
.STATE_DOWNLOADED
808 self
.is_played
= False
809 self
.length
= os
.path
.getsize(filename
)
810 self
.db
.save_downloaded_episode(self
)
813 def set_state(self
, state
):
815 self
.db
.update_episode_state(self
)
817 def mark(self
, state
=None, is_played
=None, is_locked
=None):
818 if state
is not None:
820 if is_played
is not None:
821 self
.is_played
= is_played
822 if is_locked
is not None:
823 self
.is_locked
= is_locked
824 self
.db
.update_episode_state(self
)
827 def title_markup(self
):
828 return '%s\n<small>%s</small>' % (xml
.sax
.saxutils
.escape(self
.title
),
829 xml
.sax
.saxutils
.escape(self
.channel
.title
))
832 def maemo_markup(self
):
834 length_str
= '%s; ' % self
.filesize_prop
837 return ('<b>%s</b>\n<small>%s'+_('released %s')+ \
838 '; '+_('from %s')+'</small>') % (\
839 xml
.sax
.saxutils
.escape(self
.title
), \
840 xml
.sax
.saxutils
.escape(length_str
), \
841 xml
.sax
.saxutils
.escape(self
.pubdate_prop
), \
842 xml
.sax
.saxutils
.escape(self
.channel
.title
))
845 def maemo_remove_markup(self
):
847 played_string
= _('played')
849 played_string
= _('unplayed')
850 downloaded_string
= self
.get_age_string()
851 if not downloaded_string
:
852 downloaded_string
= _('today')
853 return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
854 '; '+_('from %s')+'</small>') % (\
855 xml
.sax
.saxutils
.escape(self
.title
), \
856 xml
.sax
.saxutils
.escape(self
.filesize_prop
), \
857 xml
.sax
.saxutils
.escape(played_string
), \
858 xml
.sax
.saxutils
.escape(downloaded_string
), \
859 xml
.sax
.saxutils
.escape(self
.channel
.title
))
861 def age_in_days(self
):
862 return util
.file_age_in_days(self
.local_filename(create
=False, \
865 def get_age_string(self
):
866 return util
.file_age_to_string(self
.age_in_days())
868 age_prop
= property(fget
=get_age_string
)
870 def one_line_description( self
):
871 lines
= util
.remove_html_tags(self
.description
).strip().splitlines()
872 if not lines
or lines
[0] == '':
873 return _('No description available')
875 return ' '.join(lines
)
877 def delete_from_disk(self
):
879 self
.channel
.delete_episode(self
)
881 log('Cannot delete episode from disk: %s', self
.title
, traceback
=True, sender
=self
)
883 def find_unique_file_name(self
, url
, filename
, extension
):
884 current_try
= util
.sanitize_filename(filename
, self
.MAX_FILENAME_LENGTH
)+extension
888 if self
.filename
== current_try
and current_try
is not None:
889 # We already have this filename - good!
892 while self
.db
.episode_filename_exists(current_try
):
893 current_try
= '%s (%d)%s' % (filename
, next_try_id
, extension
)
898 def local_filename(self
, create
, force_update
=False, check_only
=False,
900 """Get (and possibly generate) the local saving filename
902 Pass create=True if you want this function to generate a
903 new filename if none exists. You only want to do this when
904 planning to create/download the file after calling this function.
906 Normally, you should pass create=False. This will only
907 create a filename when the file already exists from a previous
908 version of gPodder (where we used md5 filenames). If the file
909 does not exist (and the filename also does not exist), this
910 function will return None.
912 If you pass force_update=True to this function, it will try to
913 find a new (better) filename and move the current file if this
914 is the case. This is useful if (during the download) you get
915 more information about the file, e.g. the mimetype and you want
916 to include this information in the file name generation process.
918 If check_only=True is passed to this function, it will never try
919 to rename the file, even if would be a good idea. Use this if you
920 only want to check if a file exists.
922 If "template" is specified, it should be a filename that is to
923 be used as a template for generating the "real" filename.
925 The generated filename is stored in the database for future access.
927 ext
= self
.extension(may_call_local_filename
=False).encode('utf-8', 'ignore')
929 # For compatibility with already-downloaded episodes, we
930 # have to know md5 filenames if they are downloaded already
931 urldigest
= hashlib
.md5(self
.url
).hexdigest()
933 if not create
and self
.filename
is None:
934 urldigest_filename
= os
.path
.join(self
.channel
.save_dir
, urldigest
+ext
)
935 if os
.path
.exists(urldigest_filename
):
936 # The file exists, so set it up in our database
937 log('Recovering pre-0.15.0 file: %s', urldigest_filename
, sender
=self
)
938 self
.filename
= urldigest
+ext
939 self
.auto_filename
= 1
941 return urldigest_filename
944 # We only want to check if the file exists, so don't try to
945 # rename the file, even if it would be reasonable. See also:
946 # http://bugs.gpodder.org/attachment.cgi?id=236
948 if self
.filename
is None:
951 return os
.path
.join(self
.channel
.save_dir
, self
.filename
)
953 if self
.filename
is None or force_update
or (self
.auto_filename
and self
.filename
== urldigest
+ext
):
954 # Try to find a new filename for the current file
955 if template
is not None:
956 # If template is specified, trust the template's extension
957 episode_filename
, ext
= os
.path
.splitext(template
)
959 episode_filename
, extension_UNUSED
= util
.filename_from_url(self
.url
)
960 fn_template
= util
.sanitize_filename(episode_filename
, self
.MAX_FILENAME_LENGTH
)
962 if 'redirect' in fn_template
and template
is None:
963 # This looks like a redirection URL - force URL resolving!
964 log('Looks like a redirection to me: %s', self
.url
, sender
=self
)
965 url
= util
.get_real_url(self
.channel
.authenticate_url(self
.url
))
966 log('Redirection resolved to: %s', url
, sender
=self
)
967 (episode_filename
, extension_UNUSED
) = util
.filename_from_url(url
)
968 fn_template
= util
.sanitize_filename(episode_filename
, self
.MAX_FILENAME_LENGTH
)
970 # Use the video title for YouTube downloads
971 for yt_url
in ('http://youtube.com/', 'http://www.youtube.com/'):
972 if self
.url
.startswith(yt_url
):
973 fn_template
= util
.sanitize_filename(os
.path
.basename(self
.title
), self
.MAX_FILENAME_LENGTH
)
975 # If the basename is empty, use the md5 hexdigest of the URL
976 if len(fn_template
) == 0 or fn_template
.startswith('redirect.'):
977 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self
.channel
.url
, self
.url
, sender
=self
)
978 fn_template
= urldigest
980 # Find a unique filename for this episode
981 wanted_filename
= self
.find_unique_file_name(self
.url
, fn_template
, ext
)
983 # We populate the filename field the first time - does the old file still exist?
984 if self
.filename
is None and os
.path
.exists(os
.path
.join(self
.channel
.save_dir
, urldigest
+ext
)):
985 log('Found pre-0.15.0 downloaded file: %s', urldigest
, sender
=self
)
986 self
.filename
= urldigest
+ext
988 # The old file exists, but we have decided to want a different filename
989 if self
.filename
is not None and wanted_filename
!= self
.filename
:
990 # there might be an old download folder crawling around - move it!
991 new_file_name
= os
.path
.join(self
.channel
.save_dir
, wanted_filename
)
992 old_file_name
= os
.path
.join(self
.channel
.save_dir
, self
.filename
)
993 if os
.path
.exists(old_file_name
) and not os
.path
.exists(new_file_name
):
994 log('Renaming %s => %s', old_file_name
, new_file_name
, sender
=self
)
995 os
.rename(old_file_name
, new_file_name
)
996 elif force_update
and not os
.path
.exists(old_file_name
):
997 # When we call force_update, the file might not yet exist when we
998 # call it from the downloading code before saving the file
999 log('Choosing new filename: %s', new_file_name
, sender
=self
)
1001 log('Warning: %s exists or %s does not.', new_file_name
, old_file_name
, sender
=self
)
1002 log('Updating filename of %s to "%s".', self
.url
, wanted_filename
, sender
=self
)
1003 elif self
.filename
is None:
1004 log('Setting filename to "%s".', wanted_filename
, sender
=self
)
1006 log('Should update filename. Stays the same (%s). Good!', \
1007 wanted_filename
, sender
=self
)
1008 self
.filename
= wanted_filename
1012 return os
.path
.join(self
.channel
.save_dir
, self
.filename
)
1014 def set_mimetype(self
, mimetype
, commit
=False):
1015 """Sets the mimetype for this episode"""
1016 self
.mimetype
= mimetype
1020 def extension(self
, may_call_local_filename
=True):
1021 filename
, ext
= util
.filename_from_url(self
.url
)
1022 if may_call_local_filename
:
1023 filename
= self
.local_filename(create
=False)
1024 if filename
is not None:
1025 filename
, ext
= os
.path
.splitext(filename
)
1026 # if we can't detect the extension from the url fallback on the mimetype
1027 if ext
== '' or util
.file_type_by_extension(ext
) is None:
1028 ext
= util
.extension_from_mimetype(self
.mimetype
)
1031 def check_is_new(self
, downloading
=lambda e
: False):
1033 Returns True if this episode is to be considered new.
1034 "Downloading" should be a callback that gets an episode
1035 as its parameter and returns True if the episode is
1036 being downloaded at the moment.
1038 return self
.state
== gpodder
.STATE_NORMAL
and \
1039 not self
.is_played
and \
1040 not downloading(self
)
1043 self
.state
= gpodder
.STATE_NORMAL
1044 self
.is_played
= False
1045 self
.db
.update_episode_state(self
)
1048 self
.is_played
= True
1049 self
.db
.update_episode_state(self
)
1051 def file_exists(self
):
1052 filename
= self
.local_filename(create
=False, check_only
=True)
1053 if filename
is None:
1056 return os
.path
.exists(filename
)
1058 def was_downloaded(self
, and_exists
=False):
1059 if self
.state
!= gpodder
.STATE_DOWNLOADED
:
1061 if and_exists
and not self
.file_exists():
1065 def sync_filename(self
, use_custom
=False, custom_format
=None):
1067 return util
.object_string_formatter(custom_format
,
1068 episode
=self
, podcast
=self
.channel
)
1072 def file_type(self
):
1073 # Assume all YouTube links are video files
1074 if youtube
.is_video_link(self
.url
):
1077 return util
.file_type_by_extension(self
.extension())
1080 def basename( self
):
1081 return os
.path
.splitext( os
.path
.basename( self
.url
))[0]
1084 def published( self
):
1086 Returns published date as YYYYMMDD (or 00000000 if not available)
1089 return datetime
.datetime
.fromtimestamp(self
.pubDate
).strftime('%Y%m%d')
1091 log( 'Cannot format pubDate for "%s".', self
.title
, sender
= self
)
1097 Returns published time as HHMM (or 0000 if not available)
1100 return datetime
.datetime
.fromtimestamp(self
.pubDate
).strftime('%H%M')
1102 log('Cannot format pubDate (time) for "%s".', self
.title
, sender
=self
)
1105 def cute_pubdate(self
):
1106 result
= util
.format_date(self
.pubDate
)
1108 return '(%s)' % _('unknown')
1112 pubdate_prop
= property(fget
=cute_pubdate
)
1114 def calculate_filesize( self
):
1115 filename
= self
.local_filename(create
=False)
1116 if filename
is None:
1117 log('calculate_filesized called, but filename is None!', sender
=self
)
1119 self
.length
= os
.path
.getsize(filename
)
1121 log( 'Could not get filesize for %s.', self
.url
)
1123 def get_filesize_string(self
):
1124 return util
.format_filesize(self
.length
)
1126 filesize_prop
= property(fget
=get_filesize_string
)
1128 def get_played_string( self
):
1129 if not self
.is_played
:
1130 return _('Unplayed')
1134 played_prop
= property(fget
=get_played_string
)
1136 def is_duplicate(self
, episode
):
1137 if self
.title
== episode
.title
and self
.pubDate
== episode
.pubDate
:
1138 log('Possible duplicate detected: %s', self
.title
)
1142 def duplicate_id(self
):
1143 return hash((self
.title
, self
.pubDate
))
1145 def update_from(self
, episode
):
1146 for k
in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
1147 setattr(self
, k
, getattr(episode
, k
))