1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # gpodder.model - Core model classes for gPodder (2009-08-13)
23 # Based on libpodcasts.py (thp, 2005-10-29)
27 from gpodder
import util
28 from gpodder
import feedcore
29 from gpodder
import youtube
30 from gpodder
import corestats
32 from gpodder
.liblogger
import log
45 import xml
.sax
.saxutils
50 class CustomFeed(feedcore
.ExceptionWithData
): pass
52 class gPodderFetcher(feedcore
.Fetcher
):
54 This class extends the feedcore Fetcher with the gPodder User-Agent and the
55 Proxy handler based on the current settings in gPodder and provides a
56 convenience method (fetch_channel) for use by PodcastChannel objects.
61 feedcore
.Fetcher
.__init
__(self
, gpodder
.user_agent
)
63 def fetch_channel(self
, channel
):
65 modified
= feedparser
._parse
_date
(channel
.last_modified
)
66 # If we have a username or password, rebuild the url with them included
67 # Note: using a HTTPBasicAuthHandler would be pain because we need to
68 # know the realm. It can be done, but I think this method works, too
69 url
= channel
.authenticate_url(channel
.url
)
70 for handler
in self
.custom_handlers
:
71 custom_feed
= handler
.handle_url(url
)
72 if custom_feed
is not None:
73 raise CustomFeed(custom_feed
)
74 self
.fetch(url
, etag
, modified
)
76 def _resolve_url(self
, url
):
77 return youtube
.get_real_channel_url(url
)
80 def register(cls
, handler
):
81 cls
.custom_handlers
.append(handler
)
83 # def _get_handlers(self):
84 # # Add a ProxyHandler for fetching data via a proxy server
85 # proxies = {'http': 'http://proxy.example.org:8080'}
86 # return[urllib2.ProxyHandler(proxies))]
88 # The "register" method is exposed here for external usage
89 register_custom_handler
= gPodderFetcher
.register
91 class PodcastModelObject(object):
93 A generic base class for our podcast model providing common helper
94 and utility functions.
98 def create_from_dict(cls
, d
, *args
):
100 Create a new object, passing "args" to the constructor
101 and then updating the object with the values from "d".
104 o
.update_from_dict(d
)
107 def update_from_dict(self
, d
):
109 Updates the attributes of this object with values from the
110 dictionary "d" by using the keys found in "d".
114 setattr(self
, k
, d
[k
])
117 class PodcastChannel(PodcastModelObject
):
118 """holds data for a complete channel"""
119 MAX_FOLDERNAME_LENGTH
= 150
121 feed_fetcher
= gPodderFetcher()
124 def build_factory(cls
, download_dir
):
125 def factory(dict, db
):
126 return cls
.create_from_dict(dict, db
, download_dir
)
130 def load_from_db(cls
, db
, download_dir
):
131 return db
.load_channels(factory
=cls
.build_factory(download_dir
))
134 def load(cls
, db
, url
, create
=True, authentication_tokens
=None,\
135 max_episodes
=0, download_dir
=None, allow_empty_feeds
=False):
136 if isinstance(url
, unicode):
137 url
= url
.encode('utf-8')
139 tmp
= db
.load_channels(factory
=cls
.build_factory(download_dir
), url
=url
)
143 tmp
= PodcastChannel(db
, download_dir
)
145 if authentication_tokens
is not None:
146 tmp
.username
= authentication_tokens
[0]
147 tmp
.password
= authentication_tokens
[1]
149 tmp
.update(max_episodes
)
151 db
.force_last_new(tmp
)
152 # Subscribing to empty feeds should yield an error (except if
153 # the user specifically allows empty feeds in the config UI)
154 if sum(tmp
.get_statistics()) == 0 and not allow_empty_feeds
:
156 raise Exception(_('No downloadable episodes in feed'))
159 def episode_factory(self
, d
, db__parameter_is_unused
=None):
161 This function takes a dictionary containing key-value pairs for
162 episodes and returns a new PodcastEpisode object that is connected
163 to this PodcastChannel object.
165 Returns: A new PodcastEpisode object
167 return PodcastEpisode
.create_from_dict(d
, self
)
169 def _consume_custom_feed(self
, custom_feed
, max_episodes
=0):
170 self
.title
= custom_feed
.get_title()
171 self
.link
= custom_feed
.get_link()
172 self
.description
= custom_feed
.get_description()
173 self
.image
= custom_feed
.get_image()
174 self
.pubDate
= time
.time()
177 guids
= [episode
.guid
for episode
in self
.get_all_episodes()]
178 self
.count_new
+= custom_feed
.get_new_episodes(self
, guids
)
181 self
.db
.purge(max_episodes
, self
.id)
183 def _consume_updated_feed(self
, feed
, max_episodes
=0):
184 self
.parse_error
= feed
.get('bozo_exception', None)
186 self
.title
= feed
.feed
.get('title', self
.url
)
187 self
.link
= feed
.feed
.get('link', self
.link
)
188 self
.description
= feed
.feed
.get('subtitle', self
.description
)
189 # Start YouTube-specific title FIX
190 YOUTUBE_PREFIX
= 'Uploads by '
191 if self
.title
.startswith(YOUTUBE_PREFIX
):
192 self
.title
= self
.title
[len(YOUTUBE_PREFIX
):] + ' on YouTube'
193 # End YouTube-specific title FIX
196 self
.pubDate
= rfc822
.mktime_tz(feed
.feed
.get('updated_parsed', None+(0,)))
198 self
.pubDate
= time
.time()
200 if hasattr(feed
.feed
, 'image'):
201 if hasattr(feed
.feed
.image
, 'href') and feed
.feed
.image
.href
:
203 self
.image
= feed
.feed
.image
.href
207 # Load all episodes to update them properly.
208 existing
= self
.get_all_episodes()
210 # We can limit the maximum number of entries that gPodder will parse
211 if max_episodes
> 0 and len(feed
.entries
) > max_episodes
:
212 entries
= feed
.entries
[:max_episodes
]
214 entries
= feed
.entries
216 # Search all entries for new episodes
217 for entry
in entries
:
221 episode
= PodcastEpisode
.from_feedparser_entry(entry
, self
)
223 log('Cannot instantiate episode "%s": %s. Skipping.', entry
.get('id', '(no id available)'), e
, sender
=self
, traceback
=True)
229 if ex
.guid
== episode
.guid
or episode
.is_duplicate(ex
):
230 for k
in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
231 setattr(ex
, k
, getattr(episode
, k
))
237 # Remove "unreachable" episodes - episodes that have not been
238 # downloaded and that the feed does not list as downloadable anymore
239 if self
.id is not None:
240 seen_guids
= set(e
.guid
for e
in feed
.entries
if hasattr(e
, 'guid'))
241 episodes_to_purge
= (e
for e
in existing
if \
242 e
.state
!= gpodder
.STATE_DOWNLOADED
and \
243 e
.guid
not in seen_guids
and e
.guid
is not None)
244 for episode
in episodes_to_purge
:
245 log('Episode removed from feed: %s (%s)', episode
.title
, \
246 episode
.guid
, sender
=self
)
247 self
.db
.delete_episode_by_guid(episode
.guid
, self
.id)
249 # This *might* cause episodes to be skipped if there were more than
250 # max_episodes_per_feed items added to the feed between updates.
251 # The benefit is that it prevents old episodes from apearing as new
252 # in certain situations (see bug #340).
253 self
.db
.purge(max_episodes
, self
.id)
255 def update_channel_lock(self
):
256 self
.db
.update_channel_lock(self
)
258 def _update_etag_modified(self
, feed
):
259 self
.updated_timestamp
= time
.time()
260 self
.calculate_publish_behaviour()
261 self
.etag
= feed
.headers
.get('etag', self
.etag
)
262 self
.last_modified
= feed
.headers
.get('last-modified', self
.last_modified
)
264 def query_automatic_update(self
):
265 """Query if this channel should be updated automatically
267 Returns True if the update should happen in automatic
268 mode or False if this channel should be skipped (timeout
269 not yet reached or release not expected right now).
271 updated
= self
.updated_timestamp
272 expected
= self
.release_expected
275 one_day_ago
= now
- 60*60*24
276 lastcheck
= now
- 60*10
278 return updated
< one_day_ago
or \
279 (expected
< now
and updated
< lastcheck
)
281 def update(self
, max_episodes
=0):
283 self
.feed_fetcher
.fetch_channel(self
)
284 except CustomFeed
, updated
:
285 custom_feed
= updated
.data
286 self
._consume
_custom
_feed
(custom_feed
, max_episodes
)
288 except feedcore
.UpdatedFeed
, updated
:
290 self
._consume
_updated
_feed
(feed
, max_episodes
)
291 self
._update
_etag
_modified
(feed
)
293 except feedcore
.NewLocation
, updated
:
296 self
._consume
_updated
_feed
(feed
, max_episodes
)
297 self
._update
_etag
_modified
(feed
)
299 except feedcore
.NotModified
, updated
:
301 self
._update
_etag
_modified
(feed
)
304 # "Not really" errors
305 #feedcore.AuthenticationRequired
309 #feedcore.InternalServerError
312 #feedcore.Unsubscribe
314 #feedcore.InvalidFeed
315 #feedcore.UnknownStatusCode
320 def delete(self
, purge
=True):
321 self
.db
.delete_channel(self
, purge
)
324 self
.db
.save_channel(self
)
326 def get_statistics(self
):
328 return (0, 0, 0, 0, 0)
330 return self
.db
.get_channel_count(int(self
.id))
332 def authenticate_url(self
, url
):
333 return util
.url_add_authentication(url
, self
.username
, self
.password
)
335 def __init__(self
, db
, download_dir
):
337 self
.download_dir
= download_dir
342 self
.description
= ''
345 self
.parse_error
= None
346 self
.newest_pubdate_cached
= None
347 self
.foldername
= None
348 self
.auto_foldername
= 1 # automatically generated foldername
350 # should this channel be synced to devices? (ex: iPod)
351 self
.sync_to_devices
= True
352 # to which playlist should be synced
353 self
.device_playlist_name
= 'gPodder'
354 # if set, this overrides the channel-provided title
355 self
.override_title
= ''
359 self
.last_modified
= None
362 self
.save_dir_size
= 0
363 self
.__save
_dir
_size
_set
= False
365 self
.count_downloaded
= 0
367 self
.count_unplayed
= 0
369 self
.channel_is_locked
= False
371 self
.release_expected
= time
.time()
372 self
.release_deviation
= 0
373 self
.updated_timestamp
= 0
375 def calculate_publish_behaviour(self
):
376 episodes
= self
.db
.load_episodes(self
, factory
=self
.episode_factory
, limit
=30)
377 if len(episodes
) < 3:
381 latest
= max(e
.pubDate
for e
in episodes
)
382 for index
in range(len(episodes
)-1):
383 if episodes
[index
].pubDate
!= 0 and episodes
[index
+1].pubDate
!= 0:
384 deltas
.append(episodes
[index
].pubDate
- episodes
[index
+1].pubDate
)
387 stats
= corestats
.Stats(deltas
)
388 self
.release_expected
= min([latest
+stats
.stdev(), latest
+(stats
.min()+stats
.avg())*.5])
389 self
.release_deviation
= stats
.stdev()
391 self
.release_expected
= latest
392 self
.release_deviation
= 0
394 def request_save_dir_size(self
):
395 if not self
.__save
_dir
_size
_set
:
396 self
.update_save_dir_size()
397 self
.__save
_dir
_size
_set
= True
399 def update_save_dir_size(self
):
400 self
.save_dir_size
= util
.calculate_size(self
.save_dir
)
402 def get_title( self
):
403 if self
.override_title
:
404 return self
.override_title
405 elif not self
.__title
.strip():
410 def set_title( self
, value
):
411 self
.__title
= value
.strip()
413 title
= property(fget
=get_title
,
416 def set_custom_title( self
, custom_title
):
417 custom_title
= custom_title
.strip()
419 # if the custom title is the same as we have
420 if custom_title
== self
.override_title
:
423 # if custom title is the same as channel title and we didn't have a custom title
424 if custom_title
== self
.__title
and self
.override_title
== '':
427 # make sure self.foldername is initialized
430 # rename folder if custom_title looks sane
431 new_folder_name
= self
.find_unique_folder_name(custom_title
)
432 if len(new_folder_name
) > 0 and new_folder_name
!= self
.foldername
:
433 log('Changing foldername based on custom title: %s', custom_title
, sender
=self
)
434 new_folder
= os
.path
.join(self
.download_dir
, new_folder_name
)
435 old_folder
= os
.path
.join(self
.download_dir
, self
.foldername
)
436 if os
.path
.exists(old_folder
):
437 if not os
.path
.exists(new_folder
):
438 # Old folder exists, new folder does not -> simply rename
439 log('Renaming %s => %s', old_folder
, new_folder
, sender
=self
)
440 os
.rename(old_folder
, new_folder
)
442 # Both folders exist -> move files and delete old folder
443 log('Moving files from %s to %s', old_folder
, new_folder
, sender
=self
)
444 for file in glob
.glob(os
.path
.join(old_folder
, '*')):
445 shutil
.move(file, new_folder
)
446 log('Removing %s', old_folder
, sender
=self
)
447 shutil
.rmtree(old_folder
, ignore_errors
=True)
448 self
.foldername
= new_folder_name
451 if custom_title
!= self
.__title
:
452 self
.override_title
= custom_title
454 self
.override_title
= ''
456 def get_downloaded_episodes(self
):
457 return self
.db
.load_episodes(self
, factory
=self
.episode_factory
, state
=gpodder
.STATE_DOWNLOADED
)
459 def get_new_episodes(self
, downloading
=lambda e
: False):
461 Get a list of new episodes. You can optionally specify
462 "downloading" as a callback that takes an episode as
463 a parameter and returns True if the episode is currently
464 being downloaded or False if not.
466 By default, "downloading" is implemented so that it
467 reports all episodes as not downloading.
469 return [episode
for episode
in self
.db
.load_episodes(self
, \
470 factory
=self
.episode_factory
) if \
471 episode
.check_is_new(downloading
=downloading
)]
473 def get_playlist_filename(self
):
474 # If the save_dir doesn't end with a slash (which it really should
475 # not, if the implementation is correct, we can just append .m3u :)
476 assert self
.save_dir
[-1] != '/'
477 return self
.save_dir
+'.m3u'
479 def update_m3u_playlist(self
):
480 m3u_filename
= self
.get_playlist_filename()
482 downloaded_episodes
= self
.get_downloaded_episodes()
483 if not downloaded_episodes
:
484 log('No episodes - removing %s', m3u_filename
, sender
=self
)
485 util
.delete_file(m3u_filename
)
488 log('Writing playlist to %s', m3u_filename
, sender
=self
)
489 f
= open(m3u_filename
, 'w')
492 # Sort downloaded episodes by publication date, ascending
493 def older(episode_a
, episode_b
):
494 return cmp(episode_a
.pubDate
, episode_b
.pubDate
)
496 for episode
in sorted(downloaded_episodes
, cmp=older
):
497 if episode
.was_downloaded(and_exists
=True):
498 filename
= episode
.local_filename(create
=False)
499 assert filename
is not None
501 if os
.path
.dirname(filename
).startswith(os
.path
.dirname(m3u_filename
)):
502 filename
= filename
[len(os
.path
.dirname(m3u_filename
)+os
.sep
):]
503 f
.write('#EXTINF:0,'+self
.title
+' - '+episode
.title
+' ('+episode
.cute_pubdate()+')\n')
504 f
.write(filename
+'\n')
508 def addDownloadedItem(self
, item
):
509 log('addDownloadedItem(%s)', item
.url
)
511 if not item
.was_downloaded():
512 item
.mark_downloaded(save
=True)
513 self
.update_m3u_playlist()
515 def get_all_episodes(self
):
516 return self
.db
.load_episodes(self
, factory
=self
.episode_factory
)
518 def find_unique_folder_name(self
, foldername
):
519 # Remove trailing dots to avoid errors on Windows (bug 600)
520 foldername
= foldername
.strip().rstrip('.')
522 current_try
= util
.sanitize_filename(foldername
, \
523 self
.MAX_FOLDERNAME_LENGTH
)
527 if not os
.path
.exists(os
.path
.join(self
.download_dir
, current_try
)):
528 self
.db
.remove_foldername_if_deleted_channel(current_try
)
530 if self
.db
.channel_foldername_exists(current_try
):
531 current_try
= '%s (%d)' % (foldername
, next_try_id
)
536 def get_save_dir(self
):
537 urldigest
= hashlib
.md5(self
.url
).hexdigest()
538 sanitizedurl
= util
.sanitize_filename(self
.url
, self
.MAX_FOLDERNAME_LENGTH
)
539 if self
.foldername
is None or (self
.auto_foldername
and (self
.foldername
== urldigest
or self
.foldername
.startswith(sanitizedurl
))):
540 # we must change the folder name, because it has not been set manually
541 fn_template
= util
.sanitize_filename(self
.title
, self
.MAX_FOLDERNAME_LENGTH
)
543 # if this is an empty string, try the basename
544 if len(fn_template
) == 0:
545 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self
.url
, sender
=self
)
546 fn_template
= util
.sanitize_filename(os
.path
.basename(self
.url
), self
.MAX_FOLDERNAME_LENGTH
)
548 # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
549 if len(fn_template
) == 0:
550 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self
.url
, sender
=self
)
551 fn_template
= urldigest
# no need for sanitize_filename here
553 # Find a unique folder name for this podcast
554 wanted_foldername
= self
.find_unique_folder_name(fn_template
)
556 # if the foldername has not been set, check if the (old) md5 filename exists
557 if self
.foldername
is None and os
.path
.exists(os
.path
.join(self
.download_dir
, urldigest
)):
558 log('Found pre-0.15.0 download folder for %s: %s', self
.title
, urldigest
, sender
=self
)
559 self
.foldername
= urldigest
561 # we have a valid, new folder name in "current_try" -> use that!
562 if self
.foldername
is not None and wanted_foldername
!= self
.foldername
:
563 # there might be an old download folder crawling around - move it!
564 new_folder_name
= os
.path
.join(self
.download_dir
, wanted_foldername
)
565 old_folder_name
= os
.path
.join(self
.download_dir
, self
.foldername
)
566 if os
.path
.exists(old_folder_name
):
567 if not os
.path
.exists(new_folder_name
):
568 # Old folder exists, new folder does not -> simply rename
569 log('Renaming %s => %s', old_folder_name
, new_folder_name
, sender
=self
)
570 os
.rename(old_folder_name
, new_folder_name
)
572 # Both folders exist -> move files and delete old folder
573 log('Moving files from %s to %s', old_folder_name
, new_folder_name
, sender
=self
)
574 for file in glob
.glob(os
.path
.join(old_folder_name
, '*')):
575 shutil
.move(file, new_folder_name
)
576 log('Removing %s', old_folder_name
, sender
=self
)
577 shutil
.rmtree(old_folder_name
, ignore_errors
=True)
578 log('Updating foldername of %s to "%s".', self
.url
, wanted_foldername
, sender
=self
)
579 self
.foldername
= wanted_foldername
582 save_dir
= os
.path
.join(self
.download_dir
, self
.foldername
)
584 # Create save_dir if it does not yet exist
585 if not util
.make_directory( save_dir
):
586 log( 'Could not create save_dir: %s', save_dir
, sender
= self
)
590 save_dir
= property(fget
=get_save_dir
)
592 def remove_downloaded( self
):
593 shutil
.rmtree( self
.save_dir
, True)
596 def cover_file(self
):
597 new_name
= os
.path
.join(self
.save_dir
, 'folder.jpg')
598 if not os
.path
.exists(new_name
):
599 old_names
= ('cover', '.cover')
600 for old_name
in old_names
:
601 filename
= os
.path
.join(self
.save_dir
, old_name
)
602 if os
.path
.exists(filename
):
603 shutil
.move(filename
, new_name
)
608 def delete_episode_by_url(self
, url
):
609 episode
= self
.db
.load_episode(url
, factory
=self
.episode_factory
)
611 if episode
is not None:
612 filename
= episode
.local_filename(create
=False)
613 if filename
is not None:
614 util
.delete_file(filename
)
616 log('Cannot delete episode: %s (I have no filename!)', episode
.title
, sender
=self
)
617 episode
.set_state(gpodder
.STATE_DELETED
)
619 self
.update_m3u_playlist()
622 class PodcastEpisode(PodcastModelObject
):
623 """holds data for one object in a channel"""
624 MAX_FILENAME_LENGTH
= 200
626 def reload_from_db(self
):
628 Re-reads all episode details for this object from the
629 database and updates this object accordingly. Can be
630 used to refresh existing objects when the database has
631 been updated (e.g. the filename has been set after a
632 download where it was not set before the download)
634 d
= self
.db
.load_episode(self
.url
)
636 self
.update_from_dict(d
)
640 def has_website_link(self
):
641 return bool(self
.link
) and (self
.link
!= self
.url
)
644 def from_feedparser_entry( entry
, channel
):
645 episode
= PodcastEpisode( channel
)
647 episode
.title
= entry
.get( 'title', util
.get_first_line( util
.remove_html_tags( entry
.get( 'summary', ''))))
648 episode
.link
= entry
.get( 'link', '')
649 episode
.description
= ''
651 # Get the episode description (prefer summary, then subtitle)
652 for key
in ('summary', 'subtitle', 'link'):
654 episode
.description
= entry
[key
]
655 if episode
.description
:
658 episode
.guid
= entry
.get( 'id', '')
659 if entry
.get( 'updated_parsed', None):
660 episode
.pubDate
= rfc822
.mktime_tz(entry
.updated_parsed
+(0,))
662 if episode
.title
== '':
663 log( 'Warning: Episode has no title, adding anyways.. (Feed Is Buggy!)', sender
= episode
)
666 if hasattr(entry
, 'enclosures') and len(entry
.enclosures
) > 0:
667 enclosure
= entry
.enclosures
[0]
668 if len(entry
.enclosures
) > 1:
669 for e
in entry
.enclosures
:
670 if hasattr( e
, 'href') and hasattr( e
, 'length') and hasattr( e
, 'type') and (e
.type.startswith('audio/') or e
.type.startswith('video/')):
671 if util
.normalize_feed_url(e
.href
) is not None:
672 log( 'Selected enclosure: %s', e
.href
, sender
= episode
)
675 episode
.url
= util
.normalize_feed_url( enclosure
.get( 'href', ''))
676 elif hasattr(entry
, 'media_content'):
677 media
= getattr(entry
, 'media_content')
679 if 'url' in m
and 'type' in m
and (m
['type'].startswith('audio/') or m
['type'].startswith('video/')):
680 if util
.normalize_feed_url(m
['url']) is not None:
681 log('Selected media_content: %s', m
['url'], sender
= episode
)
682 episode
.url
=util
.normalize_feed_url(m
['url'])
683 episode
.mimetype
=m
['type']
685 episode
.length
=int(m
['fileSize'])
687 elif hasattr(entry
, 'links'):
688 for link
in entry
.links
:
689 if not hasattr(link
, 'href'):
692 # YouTube-specific workaround
693 if youtube
.is_video_link(link
.href
):
694 episode
.url
= link
.href
697 # Check if we can resolve this link to a audio/video file
698 filename
, extension
= util
.filename_from_url(link
.href
)
699 file_type
= util
.file_type_by_extension(extension
)
700 if file_type
is None and hasattr(link
, 'type'):
701 extension
= util
.extension_from_mimetype(link
.type)
702 file_type
= util
.file_type_by_extension(extension
)
704 # The link points to a audio or video file - use it!
705 if file_type
is not None:
706 log('Adding episode with link to file type "%s".', \
707 file_type
, sender
=episode
)
708 episode
.url
= link
.href
711 # Still no luck finding an episode? Try to forcefully scan the
712 # HTML/plaintext contents of the entry for MP3 links
714 mp3s
= re
.compile(r
'http://[^"]*\.mp3')
715 for content
in entry
.get('content', []):
717 for match
in mp3s
.finditer(html
):
718 episode
.url
= match
.group(0)
724 # This item in the feed has no downloadable enclosure
728 if not episode
.pubDate
:
729 metainfo
= util
.get_episode_info_from_url(episode
.url
)
730 if 'pubdate' in metainfo
:
732 episode
.pubDate
= int(float(metainfo
['pubdate']))
734 log('Cannot convert pubDate "%s" in from_feedparser_entry.', str(metainfo
['pubdate']), traceback
=True)
736 if hasattr(enclosure
, 'length'):
738 episode
.length
= int(enclosure
.length
)
739 if episode
.length
== 0:
740 raise ValueError('Zero-length is not acceptable')
741 except ValueError, ve
:
742 log('Invalid episode length: %s (%s)', enclosure
.length
, ve
.message
)
745 if hasattr( enclosure
, 'type'):
746 episode
.mimetype
= enclosure
.type
748 if episode
.title
== '':
749 ( filename
, extension
) = os
.path
.splitext( os
.path
.basename( episode
.url
))
750 episode
.title
= filename
755 def __init__(self
, channel
):
757 # Used by Storage for faster saving
762 self
.mimetype
= 'application/octet-stream'
764 self
.description
= ''
766 self
.channel
= channel
769 self
.auto_filename
= 1 # automatically generated filename
771 self
.state
= gpodder
.STATE_NORMAL
772 self
.is_played
= False
773 self
.is_locked
= channel
.channel_is_locked
776 if self
.state
!= gpodder
.STATE_DOWNLOADED
and self
.file_exists():
777 self
.state
= gpodder
.STATE_DOWNLOADED
778 self
.db
.save_episode(self
)
780 def set_state(self
, state
):
782 self
.db
.mark_episode(self
.url
, state
=self
.state
, is_played
=self
.is_played
, is_locked
=self
.is_locked
)
784 def mark(self
, state
=None, is_played
=None, is_locked
=None):
785 if state
is not None:
787 if is_played
is not None:
788 self
.is_played
= is_played
789 if is_locked
is not None:
790 self
.is_locked
= is_locked
791 self
.db
.mark_episode(self
.url
, state
=state
, is_played
=is_played
, is_locked
=is_locked
)
793 def mark_downloaded(self
, save
=False):
794 self
.state
= gpodder
.STATE_DOWNLOADED
795 self
.is_played
= False
801 def title_markup(self
):
802 return '%s\n<small>%s</small>' % (xml
.sax
.saxutils
.escape(self
.title
),
803 xml
.sax
.saxutils
.escape(self
.channel
.title
))
806 def maemo_markup(self
):
807 return ('<b>%s</b>\n<small>%s; '+_('released %s')+ \
808 '; '+_('from %s')+'</small>') % (\
809 xml
.sax
.saxutils
.escape(self
.title
), \
810 xml
.sax
.saxutils
.escape(self
.filesize_prop
), \
811 xml
.sax
.saxutils
.escape(self
.pubdate_prop
), \
812 xml
.sax
.saxutils
.escape(self
.channel
.title
))
815 def maemo_remove_markup(self
):
817 played_string
= _('played')
819 played_string
= _('unplayed')
820 downloaded_string
= self
.get_age_string()
821 if not downloaded_string
:
822 downloaded_string
= _('today')
823 return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
824 '; '+_('from %s')+'</small>') % (\
825 xml
.sax
.saxutils
.escape(self
.title
), \
826 xml
.sax
.saxutils
.escape(self
.filesize_prop
), \
827 xml
.sax
.saxutils
.escape(played_string
), \
828 xml
.sax
.saxutils
.escape(downloaded_string
), \
829 xml
.sax
.saxutils
.escape(self
.channel
.title
))
831 def age_in_days(self
):
832 return util
.file_age_in_days(self
.local_filename(create
=False, \
835 def get_age_string(self
):
836 return util
.file_age_to_string(self
.age_in_days())
838 age_prop
= property(fget
=get_age_string
)
840 def one_line_description( self
):
841 lines
= util
.remove_html_tags(self
.description
).strip().splitlines()
842 if not lines
or lines
[0] == '':
843 return _('No description available')
845 return ' '.join(lines
)
847 def delete_from_disk(self
):
849 self
.channel
.delete_episode_by_url(self
.url
)
851 log('Cannot delete episode from disk: %s', self
.title
, traceback
=True, sender
=self
)
853 def find_unique_file_name(self
, url
, filename
, extension
):
854 current_try
= util
.sanitize_filename(filename
, self
.MAX_FILENAME_LENGTH
)+extension
858 if self
.filename
== current_try
and current_try
is not None:
859 # We already have this filename - good!
862 while self
.db
.episode_filename_exists(current_try
):
863 if next_try_id
== 2 and not youtube
.is_video_link(url
):
864 # If we arrive here, current_try has a collision, so
865 # try to resolve the URL for a better basename
866 log('Filename collision: %s - trying to resolve...', current_try
, sender
=self
)
867 url
= util
.get_real_url(self
.channel
.authenticate_url(url
))
868 episode_filename
, extension_UNUSED
= util
.filename_from_url(url
)
869 current_try
= util
.sanitize_filename(episode_filename
, self
.MAX_FILENAME_LENGTH
)+extension
870 if not self
.db
.episode_filename_exists(current_try
) and current_try
:
871 log('Filename %s is available - collision resolved.', current_try
, sender
=self
)
874 filename
= episode_filename
875 log('Continuing search with %s as basename...', filename
, sender
=self
)
877 current_try
= '%s (%d)%s' % (filename
, next_try_id
, extension
)
882 def local_filename(self
, create
, force_update
=False, check_only
=False,
884 """Get (and possibly generate) the local saving filename
886 Pass create=True if you want this function to generate a
887 new filename if none exists. You only want to do this when
888 planning to create/download the file after calling this function.
890 Normally, you should pass create=False. This will only
891 create a filename when the file already exists from a previous
892 version of gPodder (where we used md5 filenames). If the file
893 does not exist (and the filename also does not exist), this
894 function will return None.
896 If you pass force_update=True to this function, it will try to
897 find a new (better) filename and move the current file if this
898 is the case. This is useful if (during the download) you get
899 more information about the file, e.g. the mimetype and you want
900 to include this information in the file name generation process.
902 If check_only=True is passed to this function, it will never try
903 to rename the file, even if would be a good idea. Use this if you
904 only want to check if a file exists.
906 If "template" is specified, it should be a filename that is to
907 be used as a template for generating the "real" filename.
909 The generated filename is stored in the database for future access.
911 ext
= self
.extension(may_call_local_filename
=False).encode('utf-8', 'ignore')
913 # For compatibility with already-downloaded episodes, we
914 # have to know md5 filenames if they are downloaded already
915 urldigest
= hashlib
.md5(self
.url
).hexdigest()
917 if not create
and self
.filename
is None:
918 urldigest_filename
= os
.path
.join(self
.channel
.save_dir
, urldigest
+ext
)
919 if os
.path
.exists(urldigest_filename
):
920 # The file exists, so set it up in our database
921 log('Recovering pre-0.15.0 file: %s', urldigest_filename
, sender
=self
)
922 self
.filename
= urldigest
+ext
923 self
.auto_filename
= 1
925 return urldigest_filename
928 # We only want to check if the file exists, so don't try to
929 # rename the file, even if it would be reasonable. See also:
930 # http://bugs.gpodder.org/attachment.cgi?id=236
932 if self
.filename
is None:
935 return os
.path
.join(self
.channel
.save_dir
, self
.filename
)
937 if self
.filename
is None or force_update
or (self
.auto_filename
and self
.filename
== urldigest
+ext
):
938 # Try to find a new filename for the current file
939 if template
is not None:
940 # If template is specified, trust the template's extension
941 episode_filename
, ext
= os
.path
.splitext(template
)
943 episode_filename
, extension_UNUSED
= util
.filename_from_url(self
.url
)
944 fn_template
= util
.sanitize_filename(episode_filename
, self
.MAX_FILENAME_LENGTH
)
946 if 'redirect' in fn_template
and template
is None:
947 # This looks like a redirection URL - force URL resolving!
948 log('Looks like a redirection to me: %s', self
.url
, sender
=self
)
949 url
= util
.get_real_url(self
.channel
.authenticate_url(self
.url
))
950 log('Redirection resolved to: %s', url
, sender
=self
)
951 (episode_filename
, extension_UNUSED
) = util
.filename_from_url(url
)
952 fn_template
= util
.sanitize_filename(episode_filename
, self
.MAX_FILENAME_LENGTH
)
954 # Use the video title for YouTube downloads
955 for yt_url
in ('http://youtube.com/', 'http://www.youtube.com/'):
956 if self
.url
.startswith(yt_url
):
957 fn_template
= os
.path
.basename(self
.title
)
959 # If the basename is empty, use the md5 hexdigest of the URL
960 if len(fn_template
) == 0 or fn_template
.startswith('redirect.'):
961 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self
.channel
.url
, self
.url
, sender
=self
)
962 fn_template
= urldigest
964 # Find a unique filename for this episode
965 wanted_filename
= self
.find_unique_file_name(self
.url
, fn_template
, ext
)
967 # We populate the filename field the first time - does the old file still exist?
968 if self
.filename
is None and os
.path
.exists(os
.path
.join(self
.channel
.save_dir
, urldigest
+ext
)):
969 log('Found pre-0.15.0 downloaded file: %s', urldigest
, sender
=self
)
970 self
.filename
= urldigest
+ext
972 # The old file exists, but we have decided to want a different filename
973 if self
.filename
is not None and wanted_filename
!= self
.filename
:
974 # there might be an old download folder crawling around - move it!
975 new_file_name
= os
.path
.join(self
.channel
.save_dir
, wanted_filename
)
976 old_file_name
= os
.path
.join(self
.channel
.save_dir
, self
.filename
)
977 if os
.path
.exists(old_file_name
) and not os
.path
.exists(new_file_name
):
978 log('Renaming %s => %s', old_file_name
, new_file_name
, sender
=self
)
979 os
.rename(old_file_name
, new_file_name
)
980 elif force_update
and not os
.path
.exists(old_file_name
):
981 # When we call force_update, the file might not yet exist when we
982 # call it from the downloading code before saving the file
983 log('Choosing new filename: %s', new_file_name
, sender
=self
)
985 log('Warning: %s exists or %s does not.', new_file_name
, old_file_name
, sender
=self
)
986 log('Updating filename of %s to "%s".', self
.url
, wanted_filename
, sender
=self
)
987 elif self
.filename
is None:
988 log('Setting filename to "%s".', wanted_filename
, sender
=self
)
990 log('Should update filename. Stays the same (%s). Good!', \
991 wanted_filename
, sender
=self
)
992 self
.filename
= wanted_filename
996 return os
.path
.join(self
.channel
.save_dir
, self
.filename
)
998 def set_mimetype(self
, mimetype
, commit
=False):
999 """Sets the mimetype for this episode"""
1000 self
.mimetype
= mimetype
1004 def extension(self
, may_call_local_filename
=True):
1005 filename
, ext
= util
.filename_from_url(self
.url
)
1006 if may_call_local_filename
:
1007 filename
= self
.local_filename(create
=False)
1008 if filename
is not None:
1009 filename
, ext
= os
.path
.splitext(filename
)
1010 # if we can't detect the extension from the url fallback on the mimetype
1011 if ext
== '' or util
.file_type_by_extension(ext
) is None:
1012 ext
= util
.extension_from_mimetype(self
.mimetype
)
1015 def check_is_new(self
, downloading
=lambda e
: False):
1017 Returns True if this episode is to be considered new.
1018 "Downloading" should be a callback that gets an episode
1019 as its parameter and returns True if the episode is
1020 being downloaded at the moment.
1022 return self
.state
== gpodder
.STATE_NORMAL
and \
1023 not self
.is_played
and \
1024 not downloading(self
)
1027 self
.state
= gpodder
.STATE_NORMAL
1028 self
.is_played
= False
1029 self
.db
.mark_episode(self
.url
, state
=self
.state
, is_played
=self
.is_played
)
1032 self
.is_played
= True
1033 self
.db
.mark_episode(self
.url
, is_played
=True)
1035 def file_exists(self
):
1036 filename
= self
.local_filename(create
=False, check_only
=True)
1037 if filename
is None:
1040 return os
.path
.exists(filename
)
1042 def was_downloaded(self
, and_exists
=False):
1043 if self
.state
!= gpodder
.STATE_DOWNLOADED
:
1045 if and_exists
and not self
.file_exists():
1049 def sync_filename(self
, use_custom
=False, custom_format
=None):
1051 return util
.object_string_formatter(custom_format
,
1052 episode
=self
, podcast
=self
.channel
)
1056 def file_type( self
):
1057 return util
.file_type_by_extension( self
.extension() )
1060 def basename( self
):
1061 return os
.path
.splitext( os
.path
.basename( self
.url
))[0]
1064 def published( self
):
1066 Returns published date as YYYYMMDD (or 00000000 if not available)
1069 return datetime
.datetime
.fromtimestamp(self
.pubDate
).strftime('%Y%m%d')
1071 log( 'Cannot format pubDate for "%s".', self
.title
, sender
= self
)
1077 Returns published time as HHMM (or 0000 if not available)
1080 return datetime
.datetime
.fromtimestamp(self
.pubDate
).strftime('%H%M')
1082 log('Cannot format pubDate (time) for "%s".', self
.title
, sender
=self
)
1085 def cute_pubdate(self
):
1086 result
= util
.format_date(self
.pubDate
)
1088 return '(%s)' % _('unknown')
1092 pubdate_prop
= property(fget
=cute_pubdate
)
1094 def calculate_filesize( self
):
1095 filename
= self
.local_filename(create
=False)
1096 if filename
is None:
1097 log('calculate_filesized called, but filename is None!', sender
=self
)
1099 self
.length
= os
.path
.getsize(filename
)
1101 log( 'Could not get filesize for %s.', self
.url
)
1103 def get_filesize_string(self
):
1104 return util
.format_filesize(self
.length
)
1106 filesize_prop
= property(fget
=get_filesize_string
)
1108 def get_played_string( self
):
1109 if not self
.is_played
:
1110 return _('Unplayed')
1114 played_prop
= property(fget
=get_played_string
)
1116 def is_duplicate( self
, episode
):
1117 if self
.title
== episode
.title
and self
.pubDate
== episode
.pubDate
:
1118 log('Possible duplicate detected: %s', self
.title
)