From d4ea5b78b20bd5737d9a1db5616531f479df57e3 Mon Sep 17 00:00:00 2001 From: Nick Date: Wed, 2 Jul 2008 23:36:39 +0000 Subject: [PATCH] Tue, 01 Jul 2008 22:46:52 -0400 Better file extension detection using feed items' mimetypes * src/gpodder/gui.py: Use episode.file_type() instead of util.file_type_by_extension(util.file_extension_from_url(url)) * src/gpodder/libgpodder.py: Use episode.file_type() instead of util.file_type_by_extension(util.file_extension_from_url(url)) * src/gpodder/libpodcasts.py: Add new column to the channel list gtk.ListStore for holding the file extension. Add extension() function to podcastItem which makes use mimetype in the event that filename_from_url()[1] returns None * src/gpodder/util.py: file_extension_from_url is renamed to filename_from_url and returns (filename, extension) Added extension_from_mimetype(mimetype) git-svn-id: svn://svn.berlios.de/gpodder/trunk@752 b0d088ad-0a06-0410-aad2-9ed5178a7e87 --- ChangeLog | 15 +++++++++++++++ src/gpodder/gui.py | 2 +- src/gpodder/libgpodder.py | 2 +- src/gpodder/libpodcasts.py | 32 ++++++++++++++++++++++--------- src/gpodder/util.py | 48 +++++++++++++++++++--------------------------- 5 files changed, 60 insertions(+), 39 deletions(-) diff --git a/ChangeLog b/ChangeLog index ad392630..113e23e2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +Tue, 01 Jul 2008 22:46:52 -0400 +Better file extension detection using feed items' mimetypes + + * src/gpodder/gui.py: Use episode.file_type() instead of + util.file_type_by_extension(util.file_extension_from_url(url)) + * src/gpodder/libgpodder.py: Use episode.file_type() instead of + util.file_type_by_extension(util.file_extension_from_url(url)) + * src/gpodder/libpodcasts.py: Add new column to the channel list + gtk.ListStore for holding the file extension. Add extension() + function to podcastItem which makes use mimetype in the event that + filename_from_url()[1] returns None + * src/gpodder/util.py: file_extension_from_url is renamed to + filename_from_url and returns (filename, extension) + Added extension_from_mimetype(mimetype) + Mon, 30 Jun 2008 17:39:36 +0200 Do not update podcast list when closing channel window (patch by Justin Forest) diff --git a/src/gpodder/gui.py b/src/gpodder/gui.py index e89bb917..dfbb87b3 100644 --- a/src/gpodder/gui.py +++ b/src/gpodder/gui.py @@ -1056,7 +1056,7 @@ class gPodder(GladeWidget): else: can_download = True - if util.file_type_by_extension(util.file_extension_from_url(url)) == 'torrent': + if self.active_channel.find_episode(url).file_type() == 'torrent': can_download = can_download or gl.config.use_gnome_bittorrent can_download = can_download and not can_cancel diff --git a/src/gpodder/libgpodder.py b/src/gpodder/libgpodder.py index a5d8fe02..b329e226 100644 --- a/src/gpodder/libgpodder.py +++ b/src/gpodder/libgpodder.py @@ -321,7 +321,7 @@ class gPodderLib(object): return (True, service) # Determine the file type and set the player accordingly. - file_type = util.file_type_by_extension(util.file_extension_from_url(episode.url)) + file_type = episode.file_type() if file_type == 'video': player = self.config.videoplayer diff --git a/src/gpodder/libpodcasts.py b/src/gpodder/libpodcasts.py index 243e92d0..b6f059e5 100644 --- a/src/gpodder/libpodcasts.py +++ b/src/gpodder/libpodcasts.py @@ -338,7 +338,7 @@ class podcastChannel(object): if missing: log('Episode missing: %s (before drawing an icon)', episode.url, sender=self) - file_type = util.file_type_by_extension( util.file_extension_from_url(url)) + file_type = util.file_type_by_extension( model.get_value( iter, 9)) if file_type == 'audio': status_icon = util.get_tree_icon(ICON_AUDIO_FILE, not episode.is_played, episode.is_locked, not episode.file_exists(), self.icon_cache, icon_size) elif file_type == 'video': @@ -359,7 +359,9 @@ class podcastChannel(object): """ Return a gtk.ListStore containing episodes for this channel """ - new_model = gtk.ListStore( gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_BOOLEAN, gtk.gdk.Pixbuf, gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING) + new_model = gtk.ListStore( gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING, + gobject.TYPE_BOOLEAN, gtk.gdk.Pixbuf, gobject.TYPE_STRING, gobject.TYPE_STRING, + gobject.TYPE_STRING, gobject.TYPE_STRING, gobject.TYPE_STRING ) for item in self.get_all_episodes(): if gl.config.episode_list_descriptions: @@ -372,7 +374,9 @@ class podcastChannel(object): else: filelength = None - new_iter = new_model.append((item.url, item.title, filelength, True, None, item.cute_pubdate(), description, item.description, item.local_filename())) + new_iter = new_model.append((item.url, item.title, filelength, + True, None, item.cute_pubdate(), description, item.description, + item.local_filename(), item.extension())) self.iter_set_downloading_columns( new_model, new_iter) self.update_save_dir_size() @@ -456,7 +460,9 @@ class podcastItem(object): break episode.url = util.normalize_feed_url( enclosure.get( 'href', '')) elif hasattr(entry, 'link'): - extension = util.file_extension_from_url(entry.link) + (filename, extension) = util.filename_from_url(entry.link) + if extension == '' and hasattr( entry, 'type'): + extension = util.extension_from_mimetype(e.type) file_type = util.file_type_by_extension(extension) if file_type is not None: log('Adding episode with link to file type "%s".', file_type, sender=episode) @@ -558,8 +564,8 @@ class podcastItem(object): log('Cannot delete episode from disk: %s', self.title, traceback=True, sender=self) def local_filename( self): - ext = util.file_extension_from_url(self.url) - + ext = self.extension() + # For compatibility with already-downloaded episodes, # we accept md5 filenames if they are downloaded now. md5_filename = os.path.join(self.channel.save_dir, md5.new(self.url).hexdigest()+ext) @@ -567,8 +573,8 @@ class podcastItem(object): return md5_filename # If the md5 filename does not exist, - episode = util.file_extension_from_url(self.url, complete_filename=True) - episode = util.sanitize_filename(episode) + ( episode, e ) = util.filename_from_url(self.url) + episode = util.sanitize_filename(episode) + ext # If the episode filename looks suspicious, # we still return the md5 filename to be on @@ -578,6 +584,14 @@ class podcastItem(object): filename = os.path.join(self.channel.save_dir, episode) return filename + def extension( self): + ( filename, ext ) = util.filename_from_url(self.url) + # if we can't detect the extension from the url fallback on the mimetype + if ext == '' or util.file_type_by_extension(ext) is None: + ext = util.extension_from_mimetype(self.mimetype) + log('Getting extension from mimetype for: %s (mimetype: %s)' % (self.title, ext), sender=self) + return ext + def mark_new(self): self.state = db.STATE_NORMAL self.is_played = False @@ -604,7 +618,7 @@ class podcastItem(object): return self.title def file_type( self): - return util.file_type_by_extension( util.file_extension_from_url( self.url)) + return util.file_type_by_extension( self.extension() ) @property def basename( self): diff --git a/src/gpodder/util.py b/src/gpodder/util.py index 45f09267..990ceb11 100644 --- a/src/gpodder/util.py +++ b/src/gpodder/util.py @@ -54,6 +54,7 @@ import urllib import urllib2 import httplib import webbrowser +import mimetypes import feedparser @@ -415,12 +416,17 @@ def torrent_filename( filename): except: return None +def extension_from_mimetype(mimetype): + """ + Simply guesses what the file extension should be from the mimetype + """ + return mimetypes.guess_extension(mimetype) or '' -def file_extension_from_url(url, complete_filename=False): +def filename_from_url(url): """ - Extracts the (lowercase) file name extension (with dot) + Extracts the filename and (lowercase) extension (with dot) from a URL, e.g. http://server.com/file.MP3?download=yes - will result in the string ".mp3" being returned. + will result in the string ("file", ".mp3") being returned. This function will also try to best-guess the "real" extension for a media file (audio, video, torrent) by @@ -428,43 +434,29 @@ def file_extension_from_url(url, complete_filename=False): into the query string to find better matches, if the original extension does not resolve to a known type. - If the optional parameter "complete_filename" is set to - True, this will not return the extension, but the - complete filename (basename) of the found media file. - - http://my.net/redirect.php?my.net/file.ogg => ".ogg" - http://server/get.jsp?file=/episode0815.MOV => ".mov" - http://s/redirect.mp4?http://serv2/test.mp4 => ".mp4" + http://my.net/redirect.php?my.net/file.ogg => ("file", ".ogg") + http://server/get.jsp?file=/episode0815.MOV => ("episode0815", ".mov") + http://s/redirect.mp4?http://serv2/test.mp4 => ("test", ".mp4") """ (scheme, netloc, path, para, query, fragid) = urlparse.urlparse(url) - filename = os.path.basename( urllib.unquote(path)) - (tmp, extension) = os.path.splitext(filename) + (filename, extension) = os.path.splitext(os.path.basename( urllib.unquote(path))) if file_type_by_extension(extension) is not None and not \ query.startswith(scheme+'://'): # We have found a valid extension (audio, video, torrent) # and the query string doesn't look like a URL - if complete_filename: - return filename - else: - return extension.lower() - + return ( filename, extension.lower() ) + # If the query string looks like a possible URL, try that first if len(query.strip()) > 0 and query.find('/') != -1: query_url = '://'.join((scheme, urllib.unquote(query))) - query_extension = file_extension_from_url(query_url) + (query_filename, query_extension) = filename_from_url(query_url) if file_type_by_extension(query_extension) is not None: - if complete_filename: - return os.path.basename(query_url) - else: - return query_extension - - # No exact match found, simply return the original extension - if complete_filename: - return filename - else: - return extension.lower() + return os.path.splitext(os.path.basename(query_url)) + + # No exact match found, simply return the original filename & extension + return ( filename, extension.lower() ) def file_type_by_extension( extension): -- 2.11.4.GIT