Add 2010 to the years in copyright notice
[gpodder.git] / src / gpodder / model.py
blobd77584f52832f5682b96a366a3b189b8a2b28bba
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 # gpodder.model - Core model classes for gPodder (2009-08-13)
23 # Based on libpodcasts.py (thp, 2005-10-29)
26 import gpodder
27 from gpodder import util
28 from gpodder import feedcore
29 from gpodder import youtube
30 from gpodder import corestats
32 from gpodder.liblogger import log
34 import os
35 import re
36 import glob
37 import shutil
38 import urllib
39 import urlparse
40 import time
41 import datetime
42 import rfc822
43 import hashlib
44 import feedparser
45 import xml.sax.saxutils
47 _ = gpodder.gettext
50 class CustomFeed(feedcore.ExceptionWithData): pass
52 class gPodderFetcher(feedcore.Fetcher):
53 """
54 This class extends the feedcore Fetcher with the gPodder User-Agent and the
55 Proxy handler based on the current settings in gPodder and provides a
56 convenience method (fetch_channel) for use by PodcastChannel objects.
57 """
58 custom_handlers = []
60 def __init__(self):
61 feedcore.Fetcher.__init__(self, gpodder.user_agent)
63 def fetch_channel(self, channel):
64 etag = channel.etag
65 modified = feedparser._parse_date(channel.last_modified)
66 # If we have a username or password, rebuild the url with them included
67 # Note: using a HTTPBasicAuthHandler would be pain because we need to
68 # know the realm. It can be done, but I think this method works, too
69 url = channel.authenticate_url(channel.url)
70 for handler in self.custom_handlers:
71 custom_feed = handler.handle_url(url)
72 if custom_feed is not None:
73 raise CustomFeed(custom_feed)
74 self.fetch(url, etag, modified)
76 def _resolve_url(self, url):
77 return youtube.get_real_channel_url(url)
79 @classmethod
80 def register(cls, handler):
81 cls.custom_handlers.append(handler)
83 # def _get_handlers(self):
84 # # Add a ProxyHandler for fetching data via a proxy server
85 # proxies = {'http': 'http://proxy.example.org:8080'}
86 # return[urllib2.ProxyHandler(proxies))]
88 # The "register" method is exposed here for external usage
89 register_custom_handler = gPodderFetcher.register
91 class PodcastModelObject(object):
92 """
93 A generic base class for our podcast model providing common helper
94 and utility functions.
95 """
97 @classmethod
98 def create_from_dict(cls, d, *args):
99 """
100 Create a new object, passing "args" to the constructor
101 and then updating the object with the values from "d".
103 o = cls(*args)
104 o.update_from_dict(d)
105 return o
107 def update_from_dict(self, d):
109 Updates the attributes of this object with values from the
110 dictionary "d" by using the keys found in "d".
112 for k in d:
113 if hasattr(self, k):
114 setattr(self, k, d[k])
117 class PodcastChannel(PodcastModelObject):
118 """holds data for a complete channel"""
119 MAX_FOLDERNAME_LENGTH = 150
121 feed_fetcher = gPodderFetcher()
123 @classmethod
124 def build_factory(cls, download_dir):
125 def factory(dict, db):
126 return cls.create_from_dict(dict, db, download_dir)
127 return factory
129 @classmethod
130 def load_from_db(cls, db, download_dir):
131 return db.load_channels(factory=cls.build_factory(download_dir))
133 @classmethod
134 def load(cls, db, url, create=True, authentication_tokens=None,\
135 max_episodes=0, download_dir=None, allow_empty_feeds=False):
136 if isinstance(url, unicode):
137 url = url.encode('utf-8')
139 tmp = db.load_channels(factory=cls.build_factory(download_dir), url=url)
140 if len(tmp):
141 return tmp[0]
142 elif create:
143 tmp = PodcastChannel(db, download_dir)
144 tmp.url = url
145 if authentication_tokens is not None:
146 tmp.username = authentication_tokens[0]
147 tmp.password = authentication_tokens[1]
149 tmp.update(max_episodes)
150 tmp.save()
151 db.force_last_new(tmp)
152 # Subscribing to empty feeds should yield an error (except if
153 # the user specifically allows empty feeds in the config UI)
154 if sum(tmp.get_statistics()) == 0 and not allow_empty_feeds:
155 tmp.delete()
156 raise Exception(_('No downloadable episodes in feed'))
157 return tmp
159 def episode_factory(self, d, db__parameter_is_unused=None):
161 This function takes a dictionary containing key-value pairs for
162 episodes and returns a new PodcastEpisode object that is connected
163 to this PodcastChannel object.
165 Returns: A new PodcastEpisode object
167 return PodcastEpisode.create_from_dict(d, self)
169 def _consume_custom_feed(self, custom_feed, max_episodes=0):
170 self.title = custom_feed.get_title()
171 self.link = custom_feed.get_link()
172 self.description = custom_feed.get_description()
173 self.image = custom_feed.get_image()
174 self.pubDate = time.time()
175 self.save()
177 guids = [episode.guid for episode in self.get_all_episodes()]
178 self.count_new += custom_feed.get_new_episodes(self, guids)
179 self.save()
181 self.db.purge(max_episodes, self.id)
183 def _consume_updated_feed(self, feed, max_episodes=0):
184 self.parse_error = feed.get('bozo_exception', None)
186 self.title = feed.feed.get('title', self.url)
187 self.link = feed.feed.get('link', self.link)
188 self.description = feed.feed.get('subtitle', self.description)
189 # Start YouTube-specific title FIX
190 YOUTUBE_PREFIX = 'Uploads by '
191 if self.title.startswith(YOUTUBE_PREFIX):
192 self.title = self.title[len(YOUTUBE_PREFIX):] + ' on YouTube'
193 # End YouTube-specific title FIX
195 try:
196 self.pubDate = rfc822.mktime_tz(feed.feed.get('updated_parsed', None+(0,)))
197 except:
198 self.pubDate = time.time()
200 if hasattr(feed.feed, 'image'):
201 if hasattr(feed.feed.image, 'href') and feed.feed.image.href:
202 old = self.image
203 self.image = feed.feed.image.href
205 self.save()
207 # Load all episodes to update them properly.
208 existing = self.get_all_episodes()
210 # We can limit the maximum number of entries that gPodder will parse
211 if max_episodes > 0 and len(feed.entries) > max_episodes:
212 entries = feed.entries[:max_episodes]
213 else:
214 entries = feed.entries
216 # Search all entries for new episodes
217 for entry in entries:
218 episode = None
220 try:
221 episode = PodcastEpisode.from_feedparser_entry(entry, self)
222 except Exception, e:
223 log('Cannot instantiate episode "%s": %s. Skipping.', entry.get('id', '(no id available)'), e, sender=self, traceback=True)
225 if episode:
226 self.count_new += 1
228 for ex in existing:
229 if ex.guid == episode.guid or episode.is_duplicate(ex):
230 for k in ('title', 'url', 'description', 'link', 'pubDate', 'guid'):
231 setattr(ex, k, getattr(episode, k))
232 self.count_new -= 1
233 episode = ex
235 episode.save()
237 # Remove "unreachable" episodes - episodes that have not been
238 # downloaded and that the feed does not list as downloadable anymore
239 if self.id is not None:
240 seen_guids = set(e.guid for e in feed.entries if hasattr(e, 'guid'))
241 episodes_to_purge = (e for e in existing if \
242 e.state != gpodder.STATE_DOWNLOADED and \
243 e.guid not in seen_guids and e.guid is not None)
244 for episode in episodes_to_purge:
245 log('Episode removed from feed: %s (%s)', episode.title, \
246 episode.guid, sender=self)
247 self.db.delete_episode_by_guid(episode.guid, self.id)
249 # This *might* cause episodes to be skipped if there were more than
250 # max_episodes_per_feed items added to the feed between updates.
251 # The benefit is that it prevents old episodes from apearing as new
252 # in certain situations (see bug #340).
253 self.db.purge(max_episodes, self.id)
255 def update_channel_lock(self):
256 self.db.update_channel_lock(self)
258 def _update_etag_modified(self, feed):
259 self.updated_timestamp = time.time()
260 self.calculate_publish_behaviour()
261 self.etag = feed.headers.get('etag', self.etag)
262 self.last_modified = feed.headers.get('last-modified', self.last_modified)
264 def query_automatic_update(self):
265 """Query if this channel should be updated automatically
267 Returns True if the update should happen in automatic
268 mode or False if this channel should be skipped (timeout
269 not yet reached or release not expected right now).
271 updated = self.updated_timestamp
272 expected = self.release_expected
274 now = time.time()
275 one_day_ago = now - 60*60*24
276 lastcheck = now - 60*10
278 return updated < one_day_ago or \
279 (expected < now and updated < lastcheck)
281 def update(self, max_episodes=0):
282 try:
283 self.feed_fetcher.fetch_channel(self)
284 except CustomFeed, updated:
285 custom_feed = updated.data
286 self._consume_custom_feed(custom_feed, max_episodes)
287 self.save()
288 except feedcore.UpdatedFeed, updated:
289 feed = updated.data
290 self._consume_updated_feed(feed, max_episodes)
291 self._update_etag_modified(feed)
292 self.save()
293 except feedcore.NewLocation, updated:
294 feed = updated.data
295 self.url = feed.href
296 self._consume_updated_feed(feed, max_episodes)
297 self._update_etag_modified(feed)
298 self.save()
299 except feedcore.NotModified, updated:
300 feed = updated.data
301 self._update_etag_modified(feed)
302 self.save()
303 except Exception, e:
304 # "Not really" errors
305 #feedcore.AuthenticationRequired
306 # Temporary errors
307 #feedcore.Offline
308 #feedcore.BadRequest
309 #feedcore.InternalServerError
310 #feedcore.WifiLogin
311 # Permanent errors
312 #feedcore.Unsubscribe
313 #feedcore.NotFound
314 #feedcore.InvalidFeed
315 #feedcore.UnknownStatusCode
316 raise
318 self.db.commit()
320 def delete(self, purge=True):
321 self.db.delete_channel(self, purge)
323 def save(self):
324 self.db.save_channel(self)
326 def get_statistics(self):
327 if self.id is None:
328 return (0, 0, 0, 0, 0)
329 else:
330 return self.db.get_channel_count(int(self.id))
332 def authenticate_url(self, url):
333 return util.url_add_authentication(url, self.username, self.password)
335 def __init__(self, db, download_dir):
336 self.db = db
337 self.download_dir = download_dir
338 self.id = None
339 self.url = None
340 self.title = ''
341 self.link = ''
342 self.description = ''
343 self.image = None
344 self.pubDate = 0
345 self.parse_error = None
346 self.newest_pubdate_cached = None
347 self.foldername = None
348 self.auto_foldername = 1 # automatically generated foldername
350 # should this channel be synced to devices? (ex: iPod)
351 self.sync_to_devices = True
352 # to which playlist should be synced
353 self.device_playlist_name = 'gPodder'
354 # if set, this overrides the channel-provided title
355 self.override_title = ''
356 self.username = ''
357 self.password = ''
359 self.last_modified = None
360 self.etag = None
362 self.save_dir_size = 0
363 self.__save_dir_size_set = False
365 self.count_downloaded = 0
366 self.count_new = 0
367 self.count_unplayed = 0
369 self.channel_is_locked = False
371 self.release_expected = time.time()
372 self.release_deviation = 0
373 self.updated_timestamp = 0
375 def calculate_publish_behaviour(self):
376 episodes = self.db.load_episodes(self, factory=self.episode_factory, limit=30)
377 if len(episodes) < 3:
378 return
380 deltas = []
381 latest = max(e.pubDate for e in episodes)
382 for index in range(len(episodes)-1):
383 if episodes[index].pubDate != 0 and episodes[index+1].pubDate != 0:
384 deltas.append(episodes[index].pubDate - episodes[index+1].pubDate)
386 if len(deltas) > 1:
387 stats = corestats.Stats(deltas)
388 self.release_expected = min([latest+stats.stdev(), latest+(stats.min()+stats.avg())*.5])
389 self.release_deviation = stats.stdev()
390 else:
391 self.release_expected = latest
392 self.release_deviation = 0
394 def request_save_dir_size(self):
395 if not self.__save_dir_size_set:
396 self.update_save_dir_size()
397 self.__save_dir_size_set = True
399 def update_save_dir_size(self):
400 self.save_dir_size = util.calculate_size(self.save_dir)
402 def get_title( self):
403 if self.override_title:
404 return self.override_title
405 elif not self.__title.strip():
406 return self.url
407 else:
408 return self.__title
410 def set_title( self, value):
411 self.__title = value.strip()
413 title = property(fget=get_title,
414 fset=set_title)
416 def set_custom_title( self, custom_title):
417 custom_title = custom_title.strip()
419 # if the custom title is the same as we have
420 if custom_title == self.override_title:
421 return
423 # if custom title is the same as channel title and we didn't have a custom title
424 if custom_title == self.__title and self.override_title == '':
425 return
427 # make sure self.foldername is initialized
428 self.get_save_dir()
430 # rename folder if custom_title looks sane
431 new_folder_name = self.find_unique_folder_name(custom_title)
432 if len(new_folder_name) > 0 and new_folder_name != self.foldername:
433 log('Changing foldername based on custom title: %s', custom_title, sender=self)
434 new_folder = os.path.join(self.download_dir, new_folder_name)
435 old_folder = os.path.join(self.download_dir, self.foldername)
436 if os.path.exists(old_folder):
437 if not os.path.exists(new_folder):
438 # Old folder exists, new folder does not -> simply rename
439 log('Renaming %s => %s', old_folder, new_folder, sender=self)
440 os.rename(old_folder, new_folder)
441 else:
442 # Both folders exist -> move files and delete old folder
443 log('Moving files from %s to %s', old_folder, new_folder, sender=self)
444 for file in glob.glob(os.path.join(old_folder, '*')):
445 shutil.move(file, new_folder)
446 log('Removing %s', old_folder, sender=self)
447 shutil.rmtree(old_folder, ignore_errors=True)
448 self.foldername = new_folder_name
449 self.save()
451 if custom_title != self.__title:
452 self.override_title = custom_title
453 else:
454 self.override_title = ''
456 def get_downloaded_episodes(self):
457 return self.db.load_episodes(self, factory=self.episode_factory, state=gpodder.STATE_DOWNLOADED)
459 def get_new_episodes(self, downloading=lambda e: False):
461 Get a list of new episodes. You can optionally specify
462 "downloading" as a callback that takes an episode as
463 a parameter and returns True if the episode is currently
464 being downloaded or False if not.
466 By default, "downloading" is implemented so that it
467 reports all episodes as not downloading.
469 return [episode for episode in self.db.load_episodes(self, \
470 factory=self.episode_factory) if \
471 episode.check_is_new(downloading=downloading)]
473 def get_playlist_filename(self):
474 # If the save_dir doesn't end with a slash (which it really should
475 # not, if the implementation is correct, we can just append .m3u :)
476 assert self.save_dir[-1] != '/'
477 return self.save_dir+'.m3u'
479 def update_m3u_playlist(self):
480 m3u_filename = self.get_playlist_filename()
482 downloaded_episodes = self.get_downloaded_episodes()
483 if not downloaded_episodes:
484 log('No episodes - removing %s', m3u_filename, sender=self)
485 util.delete_file(m3u_filename)
486 return
488 log('Writing playlist to %s', m3u_filename, sender=self)
489 f = open(m3u_filename, 'w')
490 f.write('#EXTM3U\n')
492 # Sort downloaded episodes by publication date, ascending
493 def older(episode_a, episode_b):
494 return cmp(episode_a.pubDate, episode_b.pubDate)
496 for episode in sorted(downloaded_episodes, cmp=older):
497 if episode.was_downloaded(and_exists=True):
498 filename = episode.local_filename(create=False)
499 assert filename is not None
501 if os.path.dirname(filename).startswith(os.path.dirname(m3u_filename)):
502 filename = filename[len(os.path.dirname(m3u_filename)+os.sep):]
503 f.write('#EXTINF:0,'+self.title+' - '+episode.title+' ('+episode.cute_pubdate()+')\n')
504 f.write(filename+'\n')
506 f.close()
508 def addDownloadedItem(self, item):
509 log('addDownloadedItem(%s)', item.url)
511 if not item.was_downloaded():
512 item.mark_downloaded(save=True)
513 self.update_m3u_playlist()
515 def get_all_episodes(self):
516 return self.db.load_episodes(self, factory=self.episode_factory)
518 def find_unique_folder_name(self, foldername):
519 # Remove trailing dots to avoid errors on Windows (bug 600)
520 foldername = foldername.strip().rstrip('.')
522 current_try = util.sanitize_filename(foldername, \
523 self.MAX_FOLDERNAME_LENGTH)
524 next_try_id = 2
526 while True:
527 if not os.path.exists(os.path.join(self.download_dir, current_try)):
528 self.db.remove_foldername_if_deleted_channel(current_try)
530 if self.db.channel_foldername_exists(current_try):
531 current_try = '%s (%d)' % (foldername, next_try_id)
532 next_try_id += 1
533 else:
534 return current_try
536 def get_save_dir(self):
537 urldigest = hashlib.md5(self.url).hexdigest()
538 sanitizedurl = util.sanitize_filename(self.url, self.MAX_FOLDERNAME_LENGTH)
539 if self.foldername is None or (self.auto_foldername and (self.foldername == urldigest or self.foldername.startswith(sanitizedurl))):
540 # we must change the folder name, because it has not been set manually
541 fn_template = util.sanitize_filename(self.title, self.MAX_FOLDERNAME_LENGTH)
543 # if this is an empty string, try the basename
544 if len(fn_template) == 0:
545 log('That is one ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
546 fn_template = util.sanitize_filename(os.path.basename(self.url), self.MAX_FOLDERNAME_LENGTH)
548 # If the basename is also empty, use the first 6 md5 hexdigest chars of the URL
549 if len(fn_template) == 0:
550 log('That is one REALLY ugly feed you have here! (Report this to bugs.gpodder.org: %s)', self.url, sender=self)
551 fn_template = urldigest # no need for sanitize_filename here
553 # Find a unique folder name for this podcast
554 wanted_foldername = self.find_unique_folder_name(fn_template)
556 # if the foldername has not been set, check if the (old) md5 filename exists
557 if self.foldername is None and os.path.exists(os.path.join(self.download_dir, urldigest)):
558 log('Found pre-0.15.0 download folder for %s: %s', self.title, urldigest, sender=self)
559 self.foldername = urldigest
561 # we have a valid, new folder name in "current_try" -> use that!
562 if self.foldername is not None and wanted_foldername != self.foldername:
563 # there might be an old download folder crawling around - move it!
564 new_folder_name = os.path.join(self.download_dir, wanted_foldername)
565 old_folder_name = os.path.join(self.download_dir, self.foldername)
566 if os.path.exists(old_folder_name):
567 if not os.path.exists(new_folder_name):
568 # Old folder exists, new folder does not -> simply rename
569 log('Renaming %s => %s', old_folder_name, new_folder_name, sender=self)
570 os.rename(old_folder_name, new_folder_name)
571 else:
572 # Both folders exist -> move files and delete old folder
573 log('Moving files from %s to %s', old_folder_name, new_folder_name, sender=self)
574 for file in glob.glob(os.path.join(old_folder_name, '*')):
575 shutil.move(file, new_folder_name)
576 log('Removing %s', old_folder_name, sender=self)
577 shutil.rmtree(old_folder_name, ignore_errors=True)
578 log('Updating foldername of %s to "%s".', self.url, wanted_foldername, sender=self)
579 self.foldername = wanted_foldername
580 self.save()
582 save_dir = os.path.join(self.download_dir, self.foldername)
584 # Create save_dir if it does not yet exist
585 if not util.make_directory( save_dir):
586 log( 'Could not create save_dir: %s', save_dir, sender = self)
588 return save_dir
590 save_dir = property(fget=get_save_dir)
592 def remove_downloaded( self):
593 shutil.rmtree( self.save_dir, True)
595 @property
596 def cover_file(self):
597 new_name = os.path.join(self.save_dir, 'folder.jpg')
598 if not os.path.exists(new_name):
599 old_names = ('cover', '.cover')
600 for old_name in old_names:
601 filename = os.path.join(self.save_dir, old_name)
602 if os.path.exists(filename):
603 shutil.move(filename, new_name)
604 return new_name
606 return new_name
608 def delete_episode_by_url(self, url):
609 episode = self.db.load_episode(url, factory=self.episode_factory)
611 if episode is not None:
612 filename = episode.local_filename(create=False)
613 if filename is not None:
614 util.delete_file(filename)
615 else:
616 log('Cannot delete episode: %s (I have no filename!)', episode.title, sender=self)
617 episode.set_state(gpodder.STATE_DELETED)
619 self.update_m3u_playlist()
622 class PodcastEpisode(PodcastModelObject):
623 """holds data for one object in a channel"""
624 MAX_FILENAME_LENGTH = 200
626 def reload_from_db(self):
628 Re-reads all episode details for this object from the
629 database and updates this object accordingly. Can be
630 used to refresh existing objects when the database has
631 been updated (e.g. the filename has been set after a
632 download where it was not set before the download)
634 d = self.db.load_episode(self.url)
635 if d is not None:
636 self.update_from_dict(d)
638 return self
640 def has_website_link(self):
641 return bool(self.link) and (self.link != self.url)
643 @staticmethod
644 def from_feedparser_entry( entry, channel):
645 episode = PodcastEpisode( channel)
647 episode.title = entry.get( 'title', util.get_first_line( util.remove_html_tags( entry.get( 'summary', ''))))
648 episode.link = entry.get( 'link', '')
649 episode.description = ''
651 # Get the episode description (prefer summary, then subtitle)
652 for key in ('summary', 'subtitle', 'link'):
653 if key in entry:
654 episode.description = entry[key]
655 if episode.description:
656 break
658 episode.guid = entry.get( 'id', '')
659 if entry.get( 'updated_parsed', None):
660 episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,))
662 if episode.title == '':
663 log( 'Warning: Episode has no title, adding anyways.. (Feed Is Buggy!)', sender = episode)
665 enclosure = None
666 if hasattr(entry, 'enclosures') and len(entry.enclosures) > 0:
667 enclosure = entry.enclosures[0]
668 if len(entry.enclosures) > 1:
669 for e in entry.enclosures:
670 if hasattr( e, 'href') and hasattr( e, 'length') and hasattr( e, 'type') and (e.type.startswith('audio/') or e.type.startswith('video/')):
671 if util.normalize_feed_url(e.href) is not None:
672 log( 'Selected enclosure: %s', e.href, sender = episode)
673 enclosure = e
674 break
675 episode.url = util.normalize_feed_url( enclosure.get( 'href', ''))
676 elif hasattr(entry, 'media_content'):
677 media = getattr(entry, 'media_content')
678 for m in media:
679 if 'url' in m and 'type' in m and (m['type'].startswith('audio/') or m['type'].startswith('video/')):
680 if util.normalize_feed_url(m['url']) is not None:
681 log('Selected media_content: %s', m['url'], sender = episode)
682 episode.url=util.normalize_feed_url(m['url'])
683 episode.mimetype=m['type']
684 if 'fileSize' in m:
685 episode.length=int(m['fileSize'])
686 break
687 elif hasattr(entry, 'links'):
688 for link in entry.links:
689 if not hasattr(link, 'href'):
690 continue
692 # YouTube-specific workaround
693 if youtube.is_video_link(link.href):
694 episode.url = link.href
695 break
697 # Check if we can resolve this link to a audio/video file
698 filename, extension = util.filename_from_url(link.href)
699 file_type = util.file_type_by_extension(extension)
700 if file_type is None and hasattr(link, 'type'):
701 extension = util.extension_from_mimetype(link.type)
702 file_type = util.file_type_by_extension(extension)
704 # The link points to a audio or video file - use it!
705 if file_type is not None:
706 log('Adding episode with link to file type "%s".', \
707 file_type, sender=episode)
708 episode.url = link.href
709 break
711 # Still no luck finding an episode? Try to forcefully scan the
712 # HTML/plaintext contents of the entry for MP3 links
713 if not episode.url:
714 mp3s = re.compile(r'http://[^"]*\.mp3')
715 for content in entry.get('content', []):
716 html = content.value
717 for match in mp3s.finditer(html):
718 episode.url = match.group(0)
719 break
720 if episode.url:
721 break
723 if not episode.url:
724 # This item in the feed has no downloadable enclosure
725 return None
727 metainfo = None
728 if not episode.pubDate:
729 metainfo = util.get_episode_info_from_url(episode.url)
730 if 'pubdate' in metainfo:
731 try:
732 episode.pubDate = int(float(metainfo['pubdate']))
733 except:
734 log('Cannot convert pubDate "%s" in from_feedparser_entry.', str(metainfo['pubdate']), traceback=True)
736 if hasattr(enclosure, 'length'):
737 try:
738 episode.length = int(enclosure.length)
739 if episode.length == 0:
740 raise ValueError('Zero-length is not acceptable')
741 except ValueError, ve:
742 log('Invalid episode length: %s (%s)', enclosure.length, ve.message)
743 episode.length = -1
745 if hasattr( enclosure, 'type'):
746 episode.mimetype = enclosure.type
748 if episode.title == '':
749 ( filename, extension ) = os.path.splitext( os.path.basename( episode.url))
750 episode.title = filename
752 return episode
755 def __init__(self, channel):
756 self.db = channel.db
757 # Used by Storage for faster saving
758 self.id = None
759 self.url = ''
760 self.title = ''
761 self.length = 0
762 self.mimetype = 'application/octet-stream'
763 self.guid = ''
764 self.description = ''
765 self.link = ''
766 self.channel = channel
767 self.pubDate = 0
768 self.filename = None
769 self.auto_filename = 1 # automatically generated filename
771 self.state = gpodder.STATE_NORMAL
772 self.is_played = False
773 self.is_locked = channel.channel_is_locked
775 def save(self):
776 if self.state != gpodder.STATE_DOWNLOADED and self.file_exists():
777 self.state = gpodder.STATE_DOWNLOADED
778 self.db.save_episode(self)
780 def set_state(self, state):
781 self.state = state
782 self.db.mark_episode(self.url, state=self.state, is_played=self.is_played, is_locked=self.is_locked)
784 def mark(self, state=None, is_played=None, is_locked=None):
785 if state is not None:
786 self.state = state
787 if is_played is not None:
788 self.is_played = is_played
789 if is_locked is not None:
790 self.is_locked = is_locked
791 self.db.mark_episode(self.url, state=state, is_played=is_played, is_locked=is_locked)
793 def mark_downloaded(self, save=False):
794 self.state = gpodder.STATE_DOWNLOADED
795 self.is_played = False
796 if save:
797 self.save()
798 self.db.commit()
800 @property
801 def title_markup(self):
802 return '%s\n<small>%s</small>' % (xml.sax.saxutils.escape(self.title),
803 xml.sax.saxutils.escape(self.channel.title))
805 @property
806 def maemo_markup(self):
807 return ('<b>%s</b>\n<small>%s; '+_('released %s')+ \
808 '; '+_('from %s')+'</small>') % (\
809 xml.sax.saxutils.escape(self.title), \
810 xml.sax.saxutils.escape(self.filesize_prop), \
811 xml.sax.saxutils.escape(self.pubdate_prop), \
812 xml.sax.saxutils.escape(self.channel.title))
814 @property
815 def maemo_remove_markup(self):
816 if self.is_played:
817 played_string = _('played')
818 else:
819 played_string = _('unplayed')
820 downloaded_string = self.get_age_string()
821 if not downloaded_string:
822 downloaded_string = _('today')
823 return ('<b>%s</b>\n<small>%s; %s; '+_('downloaded %s')+ \
824 '; '+_('from %s')+'</small>') % (\
825 xml.sax.saxutils.escape(self.title), \
826 xml.sax.saxutils.escape(self.filesize_prop), \
827 xml.sax.saxutils.escape(played_string), \
828 xml.sax.saxutils.escape(downloaded_string), \
829 xml.sax.saxutils.escape(self.channel.title))
831 def age_in_days(self):
832 return util.file_age_in_days(self.local_filename(create=False, \
833 check_only=True))
835 def get_age_string(self):
836 return util.file_age_to_string(self.age_in_days())
838 age_prop = property(fget=get_age_string)
840 def one_line_description( self):
841 lines = util.remove_html_tags(self.description).strip().splitlines()
842 if not lines or lines[0] == '':
843 return _('No description available')
844 else:
845 return ' '.join(lines)
847 def delete_from_disk(self):
848 try:
849 self.channel.delete_episode_by_url(self.url)
850 except:
851 log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self)
853 def find_unique_file_name(self, url, filename, extension):
854 current_try = util.sanitize_filename(filename, self.MAX_FILENAME_LENGTH)+extension
855 next_try_id = 2
856 lookup_url = None
858 if self.filename == current_try and current_try is not None:
859 # We already have this filename - good!
860 return current_try
862 while self.db.episode_filename_exists(current_try):
863 if next_try_id == 2 and not youtube.is_video_link(url):
864 # If we arrive here, current_try has a collision, so
865 # try to resolve the URL for a better basename
866 log('Filename collision: %s - trying to resolve...', current_try, sender=self)
867 url = util.get_real_url(self.channel.authenticate_url(url))
868 episode_filename, extension_UNUSED = util.filename_from_url(url)
869 current_try = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)+extension
870 if not self.db.episode_filename_exists(current_try) and current_try:
871 log('Filename %s is available - collision resolved.', current_try, sender=self)
872 return current_try
873 else:
874 filename = episode_filename
875 log('Continuing search with %s as basename...', filename, sender=self)
877 current_try = '%s (%d)%s' % (filename, next_try_id, extension)
878 next_try_id += 1
880 return current_try
882 def local_filename(self, create, force_update=False, check_only=False,
883 template=None):
884 """Get (and possibly generate) the local saving filename
886 Pass create=True if you want this function to generate a
887 new filename if none exists. You only want to do this when
888 planning to create/download the file after calling this function.
890 Normally, you should pass create=False. This will only
891 create a filename when the file already exists from a previous
892 version of gPodder (where we used md5 filenames). If the file
893 does not exist (and the filename also does not exist), this
894 function will return None.
896 If you pass force_update=True to this function, it will try to
897 find a new (better) filename and move the current file if this
898 is the case. This is useful if (during the download) you get
899 more information about the file, e.g. the mimetype and you want
900 to include this information in the file name generation process.
902 If check_only=True is passed to this function, it will never try
903 to rename the file, even if would be a good idea. Use this if you
904 only want to check if a file exists.
906 If "template" is specified, it should be a filename that is to
907 be used as a template for generating the "real" filename.
909 The generated filename is stored in the database for future access.
911 ext = self.extension(may_call_local_filename=False).encode('utf-8', 'ignore')
913 # For compatibility with already-downloaded episodes, we
914 # have to know md5 filenames if they are downloaded already
915 urldigest = hashlib.md5(self.url).hexdigest()
917 if not create and self.filename is None:
918 urldigest_filename = os.path.join(self.channel.save_dir, urldigest+ext)
919 if os.path.exists(urldigest_filename):
920 # The file exists, so set it up in our database
921 log('Recovering pre-0.15.0 file: %s', urldigest_filename, sender=self)
922 self.filename = urldigest+ext
923 self.auto_filename = 1
924 self.save()
925 return urldigest_filename
926 return None
928 # We only want to check if the file exists, so don't try to
929 # rename the file, even if it would be reasonable. See also:
930 # http://bugs.gpodder.org/attachment.cgi?id=236
931 if check_only:
932 if self.filename is None:
933 return None
934 else:
935 return os.path.join(self.channel.save_dir, self.filename)
937 if self.filename is None or force_update or (self.auto_filename and self.filename == urldigest+ext):
938 # Try to find a new filename for the current file
939 if template is not None:
940 # If template is specified, trust the template's extension
941 episode_filename, ext = os.path.splitext(template)
942 else:
943 episode_filename, extension_UNUSED = util.filename_from_url(self.url)
944 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
946 if 'redirect' in fn_template and template is None:
947 # This looks like a redirection URL - force URL resolving!
948 log('Looks like a redirection to me: %s', self.url, sender=self)
949 url = util.get_real_url(self.channel.authenticate_url(self.url))
950 log('Redirection resolved to: %s', url, sender=self)
951 (episode_filename, extension_UNUSED) = util.filename_from_url(url)
952 fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH)
954 # Use the video title for YouTube downloads
955 for yt_url in ('http://youtube.com/', 'http://www.youtube.com/'):
956 if self.url.startswith(yt_url):
957 fn_template = os.path.basename(self.title)
959 # If the basename is empty, use the md5 hexdigest of the URL
960 if len(fn_template) == 0 or fn_template.startswith('redirect.'):
961 log('Report to bugs.gpodder.org: Podcast at %s with episode URL: %s', self.channel.url, self.url, sender=self)
962 fn_template = urldigest
964 # Find a unique filename for this episode
965 wanted_filename = self.find_unique_file_name(self.url, fn_template, ext)
967 # We populate the filename field the first time - does the old file still exist?
968 if self.filename is None and os.path.exists(os.path.join(self.channel.save_dir, urldigest+ext)):
969 log('Found pre-0.15.0 downloaded file: %s', urldigest, sender=self)
970 self.filename = urldigest+ext
972 # The old file exists, but we have decided to want a different filename
973 if self.filename is not None and wanted_filename != self.filename:
974 # there might be an old download folder crawling around - move it!
975 new_file_name = os.path.join(self.channel.save_dir, wanted_filename)
976 old_file_name = os.path.join(self.channel.save_dir, self.filename)
977 if os.path.exists(old_file_name) and not os.path.exists(new_file_name):
978 log('Renaming %s => %s', old_file_name, new_file_name, sender=self)
979 os.rename(old_file_name, new_file_name)
980 elif force_update and not os.path.exists(old_file_name):
981 # When we call force_update, the file might not yet exist when we
982 # call it from the downloading code before saving the file
983 log('Choosing new filename: %s', new_file_name, sender=self)
984 else:
985 log('Warning: %s exists or %s does not.', new_file_name, old_file_name, sender=self)
986 log('Updating filename of %s to "%s".', self.url, wanted_filename, sender=self)
987 elif self.filename is None:
988 log('Setting filename to "%s".', wanted_filename, sender=self)
989 else:
990 log('Should update filename. Stays the same (%s). Good!', \
991 wanted_filename, sender=self)
992 self.filename = wanted_filename
993 self.save()
994 self.db.commit()
996 return os.path.join(self.channel.save_dir, self.filename)
998 def set_mimetype(self, mimetype, commit=False):
999 """Sets the mimetype for this episode"""
1000 self.mimetype = mimetype
1001 if commit:
1002 self.db.commit()
1004 def extension(self, may_call_local_filename=True):
1005 filename, ext = util.filename_from_url(self.url)
1006 if may_call_local_filename:
1007 filename = self.local_filename(create=False)
1008 if filename is not None:
1009 filename, ext = os.path.splitext(filename)
1010 # if we can't detect the extension from the url fallback on the mimetype
1011 if ext == '' or util.file_type_by_extension(ext) is None:
1012 ext = util.extension_from_mimetype(self.mimetype)
1013 return ext
1015 def check_is_new(self, downloading=lambda e: False):
1017 Returns True if this episode is to be considered new.
1018 "Downloading" should be a callback that gets an episode
1019 as its parameter and returns True if the episode is
1020 being downloaded at the moment.
1022 return self.state == gpodder.STATE_NORMAL and \
1023 not self.is_played and \
1024 not downloading(self)
1026 def mark_new(self):
1027 self.state = gpodder.STATE_NORMAL
1028 self.is_played = False
1029 self.db.mark_episode(self.url, state=self.state, is_played=self.is_played)
1031 def mark_old(self):
1032 self.is_played = True
1033 self.db.mark_episode(self.url, is_played=True)
1035 def file_exists(self):
1036 filename = self.local_filename(create=False, check_only=True)
1037 if filename is None:
1038 return False
1039 else:
1040 return os.path.exists(filename)
1042 def was_downloaded(self, and_exists=False):
1043 if self.state != gpodder.STATE_DOWNLOADED:
1044 return False
1045 if and_exists and not self.file_exists():
1046 return False
1047 return True
1049 def sync_filename(self, use_custom=False, custom_format=None):
1050 if use_custom:
1051 return util.object_string_formatter(custom_format,
1052 episode=self, podcast=self.channel)
1053 else:
1054 return self.title
1056 def file_type( self):
1057 return util.file_type_by_extension( self.extension() )
1059 @property
1060 def basename( self):
1061 return os.path.splitext( os.path.basename( self.url))[0]
1063 @property
1064 def published( self):
1066 Returns published date as YYYYMMDD (or 00000000 if not available)
1068 try:
1069 return datetime.datetime.fromtimestamp(self.pubDate).strftime('%Y%m%d')
1070 except:
1071 log( 'Cannot format pubDate for "%s".', self.title, sender = self)
1072 return '00000000'
1074 @property
1075 def pubtime(self):
1077 Returns published time as HHMM (or 0000 if not available)
1079 try:
1080 return datetime.datetime.fromtimestamp(self.pubDate).strftime('%H%M')
1081 except:
1082 log('Cannot format pubDate (time) for "%s".', self.title, sender=self)
1083 return '0000'
1085 def cute_pubdate(self):
1086 result = util.format_date(self.pubDate)
1087 if result is None:
1088 return '(%s)' % _('unknown')
1089 else:
1090 return result
1092 pubdate_prop = property(fget=cute_pubdate)
1094 def calculate_filesize( self):
1095 filename = self.local_filename(create=False)
1096 if filename is None:
1097 log('calculate_filesized called, but filename is None!', sender=self)
1098 try:
1099 self.length = os.path.getsize(filename)
1100 except:
1101 log( 'Could not get filesize for %s.', self.url)
1103 def get_filesize_string(self):
1104 return util.format_filesize(self.length)
1106 filesize_prop = property(fget=get_filesize_string)
1108 def get_played_string( self):
1109 if not self.is_played:
1110 return _('Unplayed')
1112 return ''
1114 played_prop = property(fget=get_played_string)
1116 def is_duplicate( self, episode ):
1117 if self.title == episode.title and self.pubDate == episode.pubDate:
1118 log('Possible duplicate detected: %s', self.title)
1119 return True
1120 return False