1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (C) 2005-2007 Thomas Perl <thp at perli.net>
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 # util.py -- Misc utility functions
22 # Thomas Perl <thp@perli.net> 2007-08-04
25 """Miscellaneous helper functions for gPodder
27 This module provides helper and utility functions for gPodder that
28 are not tied to any specific part of gPodder.
33 from gpodder
.liblogger
import log
45 from htmlentitydefs
import entitydefs
61 import xml
.dom
.minidom
64 def make_directory( path
):
66 Tries to create a directory if it does not exist already.
67 Returns True if the directory exists after the function
68 call, False otherwise.
70 if os
.path
.isdir( path
):
76 log( 'Could not create directory: %s', path
)
82 def normalize_feed_url( url
):
84 Converts any URL to http:// or ftp:// so that it can be
85 used with "wget". If the URL cannot be converted (invalid
86 or unknown scheme), "None" is returned.
88 This will also normalize feed:// and itpc:// to http://
89 Also supported are phobos.apple.com links (iTunes podcast)
90 and itms:// links (iTunes podcast direct link).
93 if not url
or len( url
) < 8:
96 if url
.startswith('itms://'):
97 url
= parse_itunes_xml(url
)
99 # Links to "phobos.apple.com"
100 url
= itunes_discover_rss(url
)
104 if url
.startswith( 'http://') or url
.startswith( 'https://') or url
.startswith( 'ftp://'):
107 if url
.startswith('feed://') or url
.startswith('itpc://'):
108 return 'http://' + url
[7:]
113 def username_password_from_url( url
):
115 Returns a tuple (username,password) containing authentication
116 data from the specified URL or (None,None) if no authentication
117 data can be found in the URL.
119 (username
, password
) = (None, None)
121 (scheme
, netloc
, path
, params
, query
, fragment
) = urlparse
.urlparse( url
)
124 (authentication
, netloc
) = netloc
.rsplit('@', 1)
125 if ':' in authentication
:
126 (username
, password
) = authentication
.split(':', 1)
127 username
= urllib
.unquote(username
)
128 password
= urllib
.unquote(password
)
130 username
= urllib
.unquote(authentication
)
132 return (username
, password
)
135 def directory_is_writable( path
):
137 Returns True if the specified directory exists and is writable
140 return os
.path
.isdir( path
) and os
.access( path
, os
.W_OK
)
143 def calculate_size( path
):
145 Tries to calculate the size of a directory, including any
146 subdirectories found. The returned value might not be
147 correct if the user doesn't have appropriate permissions
148 to list all subdirectories of the given path.
153 if os
.path
.dirname( path
) == '/':
156 if os
.path
.isfile( path
):
157 return os
.path
.getsize( path
)
159 if os
.path
.isdir( path
) and not os
.path
.islink( path
):
160 sum = os
.path
.getsize( path
)
162 for item
in os
.listdir( path
):
164 sum += calculate_size( os
.path
.join( path
, item
))
173 def file_modification_datetime(filename
):
175 Returns the modification date of the specified file
176 as a datetime.datetime object or None if the modification
177 date cannot be determined.
182 if not os
.access(filename
, os
.R_OK
):
186 s
= os
.stat(filename
)
187 timestamp
= s
[stat
.ST_MTIME
]
188 return datetime
.datetime
.fromtimestamp(timestamp
)
190 log('Cannot get modification timestamp for %s', filename
)
194 def file_age_in_days(filename
):
196 Returns the age of the specified filename in days or
197 zero if the modification date cannot be determined.
199 dt
= file_modification_datetime(filename
)
203 return (datetime
.datetime
.now()-dt
).days
206 def file_age_to_string(days
):
208 Converts a "number of days" value to a string that
209 can be used in the UI to display the file age.
211 >>> file_age_to_string(0)
213 >>> file_age_to_string(1)
215 >>> file_age_to_String(2)
219 return _('one day ago')
221 return _('%d days ago') % days
226 def get_free_disk_space(path
):
228 Calculates the free disk space available to the current user
229 on the file system that contains the given path.
231 If the path (or its parent folder) does not yet exist, this
232 function returns zero.
235 path
= os
.path
.dirname(path
)
236 if not os
.path
.exists(path
):
241 return s
.f_bavail
* s
.f_bsize
244 def format_filesize(bytesize
, use_si_units
=False, digits
=2):
246 Formats the given size in bytes to be human-readable,
248 Returns a localized "(unknown)" string when the bytesize
249 has a negative value.
264 bytesize
= float( bytesize
)
266 return _('(unknown)')
269 return _('(unknown)')
276 ( used_unit
, used_value
) = ( 'B', bytesize
)
278 for ( unit
, value
) in units
:
279 if bytesize
>= value
:
280 used_value
= bytesize
/ float(value
)
283 return ('%.'+str(digits
)+'f %s') % (used_value
, used_unit
)
286 def delete_file( path
):
288 Tries to delete the given filename and silently
289 ignores deletion errors (if the file doesn't exist).
290 Also deletes extracted cover files if they exist.
292 log( 'Trying to delete: %s', path
)
295 # Remove any extracted cover art that might exist
296 for cover_file
in glob
.glob( '%s.cover.*' % ( path
, )):
297 os
.unlink( cover_file
)
304 def remove_html_tags(html
):
306 Remove HTML tags from a string and replace numeric and
307 named entities with the corresponding character, so the
308 HTML text can be displayed in a simple text view.
310 # If we would want more speed, we could make these global
311 re_strip_tags
= re
.compile('<[^>]*>')
312 re_unicode_entities
= re
.compile('&#(\d{2,4});')
313 re_html_entities
= re
.compile('&(.{2,8});')
315 # Remove all HTML/XML tags from the string
316 result
= re_strip_tags
.sub('', html
)
318 # Convert numeric XML entities to their unicode character
319 result
= re_unicode_entities
.sub(lambda x
: unichr(int(x
.group(1))), result
)
321 # Convert named HTML entities to their unicode character
322 result
= re_html_entities
.sub(lambda x
: unicode(entitydefs
.get(x
.group(1),''), 'iso-8859-1'), result
)
327 def torrent_filename( filename
):
329 Checks if a file is a ".torrent" file by examining its
330 contents and searching for the file name of the file
333 Returns the name of the file the ".torrent" will download
334 or None if no filename is found (the file is no ".torrent")
336 if not os
.path
.exists( filename
):
339 header
= open( filename
).readline()
341 header
.index( '6:pieces')
342 name_length_pos
= header
.index('4:name') + 6
344 colon_pos
= header
.find( ':', name_length_pos
)
345 name_length
= int(header
[name_length_pos
:colon_pos
]) + 1
346 name
= header
[(colon_pos
+ 1):(colon_pos
+ name_length
)]
352 def file_extension_from_url( url
):
354 Extracts the (lowercase) file name extension (with dot)
355 from a URL, e.g. http://server.com/file.MP3?download=yes
356 will result in the string ".mp3" being returned.
358 This function will also try to best-guess the "real"
359 extension for a media file (audio, video, torrent) by
360 trying to match an extension to these types and recurse
361 into the query string to find better matches, if the
362 original extension does not resolve to a known type.
364 http://my.net/redirect.php?my.net/file.ogg => ".ogg"
365 http://server/get.jsp?file=/episode0815.MOV => ".mov"
367 (scheme
, netloc
, path
, para
, query
, fragid
) = urlparse
.urlparse(url
)
368 filename
= os
.path
.basename( urllib
.unquote(path
))
369 (filename
, extension
) = os
.path
.splitext(filename
)
371 if file_type_by_extension(extension
) != None:
372 # We have found a valid extension (audio, video, torrent)
373 return extension
.lower()
375 # If the query string looks like a possible URL, try that first
376 if len(query
.strip()) > 0 and query
.find('/') != -1:
377 query_url
= '://'.join((scheme
, urllib
.unquote(query
)))
378 query_extension
= file_extension_from_url(query_url
)
380 if file_type_by_extension(query_extension
) != None:
381 return query_extension
383 # No exact match found, simply return the original extension
384 return extension
.lower()
387 def file_type_by_extension( extension
):
389 Tries to guess the file type by looking up the filename
390 extension from a table of known file types. Will return
391 the type as string ("audio", "video" or "torrent") or
392 None if the file type cannot be determined.
395 'audio': [ 'mp3', 'ogg', 'wav', 'wma', 'aac', 'm4a' ],
396 'video': [ 'mp4', 'avi', 'mpg', 'mpeg', 'm4v', 'mov', 'divx', 'flv', 'wmv', '3gp' ],
397 'torrent': [ 'torrent' ],
403 if extension
[0] == '.':
404 extension
= extension
[1:]
406 extension
= extension
.lower()
409 if extension
in types
[type]:
415 def get_tree_icon(icon_name
, add_bullet
=False, add_padlock
=False, icon_cache
=None, icon_size
=32):
417 Loads an icon from the current icon theme at the specified
418 size, suitable for display in a gtk.TreeView.
420 Optionally adds a green bullet (the GTK Stock "Yes" icon)
421 to the Pixbuf returned. Also, a padlock icon can be added.
423 If an icon_cache parameter is supplied, it has to be a
424 dictionary and will be used to store generated icons.
426 On subsequent calls, icons will be loaded from cache if
427 the cache is supplied again and the icon is found in
431 if icon_cache
!= None and (icon_name
,add_bullet
,add_padlock
,icon_size
) in icon_cache
:
432 return icon_cache
[(icon_name
,add_bullet
,add_padlock
,icon_size
)]
434 icon_theme
= gtk
.icon_theme_get_default()
437 icon
= icon_theme
.load_icon(icon_name
, icon_size
, 0)
439 log( '(get_tree_icon) Warning: Cannot load icon with name "%s", will use default icon.', icon_name
)
440 icon
= icon_theme
.load_icon(gtk
.STOCK_DIALOG_QUESTION
, icon_size
, 0)
442 if icon
and (add_bullet
or add_padlock
):
443 # We'll modify the icon, so use .copy()
447 emblem
= icon_theme
.load_icon(gtk
.STOCK_YES
, int(float(icon_size
)*1.2/3.0), 0)
448 size
= emblem
.get_width()
449 pos
= icon
.get_width() - size
450 emblem
.composite(icon
, pos
, pos
, size
, size
, pos
, pos
, 1, 1, gtk
.gdk
.INTERP_BILINEAR
, 255)
452 log('(get_tree_icon) Error adding emblem to icon "%s".', icon_name
)
456 emblem
= icon_theme
.load_icon('emblem-nowrite', int(float(icon_size
)/2.0), 0)
457 size
= emblem
.get_width()
458 emblem
.composite(icon
, 0, 0, size
, size
, 0, 0, 1, 1, gtk
.gdk
.INTERP_BILINEAR
, 255)
460 log('(get_tree_icon) Error adding emblem to icon "%s".', icon_name
)
462 if icon_cache
!= None:
463 icon_cache
[(icon_name
,add_bullet
,add_padlock
,icon_size
)] = icon
468 def get_first_line( s
):
470 Returns only the first line of a string, stripped so
471 that it doesn't have whitespace before or after.
473 return s
.strip().split('\n')[0].strip()
476 def updated_parsed_to_rfc2822( updated_parsed
):
478 Converts a 9-tuple from feedparser's updated_parsed
479 field to a C-locale string suitable for further use.
481 old_locale
= locale
.getlocale( locale
.LC_TIME
)
482 locale
.setlocale( locale
.LC_TIME
, 'C')
483 result
= time
.strftime( '%a, %d %b %Y %H:%M:%S GMT', updated_parsed
)
484 locale
.setlocale( locale
.LC_TIME
, old_locale
)
488 def object_string_formatter( s
, **kwargs
):
490 Makes attributes of object passed in as keyword
491 arguments available as {OBJECTNAME.ATTRNAME} in
492 the passed-in string and returns a string with
493 the above arguments replaced with the attribute
494 values of the corresponding object.
500 s = '{episode.title} World'
502 print object_string_formatter( s, episode = e)
506 for ( key
, o
) in kwargs
.items():
507 matches
= re
.findall( r
'\{%s\.([^\}]+)\}' % key
, s
)
509 if hasattr( o
, attr
):
511 from_s
= '{%s.%s}' % ( key
, attr
)
512 to_s
= getattr( o
, attr
)
513 result
= result
.replace( from_s
, to_s
)
515 log( 'Could not replace attribute "%s" in string "%s".', attr
, s
)
520 def format_desktop_command( command
, filename
):
522 Formats a command template from the "Exec=" line of a .desktop
523 file to a string that can be invoked in a shell.
525 Handled format strings: %U, %u, %F, %f and a fallback that
526 appends the filename as first parameter of the command.
528 See http://standards.freedesktop.org/desktop-entry-spec/1.0/ar01s06.html
531 '%U': 'file://%s' % filename
,
532 '%u': 'file://%s' % filename
,
537 for key
, value
in items
.items():
538 if command
.find( key
) >= 0:
539 return command
.replace( key
, value
)
541 return '%s "%s"' % ( command
, filename
)
544 def find_command( command
):
546 Searches the system's PATH for a specific command that is
547 executable by the user. Returns the first occurence of an
548 executable binary in the PATH, or None if the command is
552 if 'PATH' not in os
.environ
:
555 for path
in os
.environ
['PATH'].split( os
.pathsep
):
556 command_file
= os
.path
.join( path
, command
)
557 if os
.path
.isfile( command_file
) and os
.access( command_file
, os
.X_OK
):
563 def parse_itunes_xml(url
):
565 Parses an XML document in the "url" parameter (this has to be
566 a itms:// or http:// URL to a XML doc) and searches all "<dict>"
567 elements for the first occurence of a "<key>feedURL</key>"
568 element and then continues the search for the string value of
571 This returns the RSS feed URL for Apple iTunes Podcast XML
572 documents that are retrieved by itunes_discover_rss().
574 url
= url
.replace('itms://', 'http://')
575 doc
= http_get_and_gunzip(url
)
576 d
= xml
.dom
.minidom
.parseString(doc
)
578 for pairs
in d
.getElementsByTagName('dict'):
579 for node
in pairs
.childNodes
:
580 if node
.nodeType
!= node
.ELEMENT_NODE
:
583 if node
.tagName
== 'key' and node
.childNodes
.length
> 0:
584 if node
.firstChild
.nodeType
== node
.TEXT_NODE
:
585 last_key
= node
.firstChild
.data
587 if last_key
!= 'feedURL':
590 if node
.tagName
== 'string' and node
.childNodes
.length
> 0:
591 if node
.firstChild
.nodeType
== node
.TEXT_NODE
:
592 return node
.firstChild
.data
597 def http_get_and_gunzip(uri
):
599 Does a HTTP GET request and tells the server that we accept
600 gzip-encoded data. This is necessary, because the Apple iTunes
601 server will always return gzip-encoded data, regardless of what
604 Returns the uncompressed document at the given URI.
606 request
= urllib2
.Request(uri
)
607 request
.add_header("Accept-encoding", "gzip")
608 usock
= urllib2
.urlopen(request
)
610 if usock
.headers
.get('content-encoding', None) == 'gzip':
611 data
= gzip
.GzipFile(fileobj
=StringIO
.StringIO(data
)).read()
615 def itunes_discover_rss(url
):
617 Takes an iTunes-specific podcast URL and turns it
618 into a "normal" RSS feed URL. If the given URL is
619 not a phobos.apple.com URL, we will simply return
620 the URL and assume it's already an RSS feed URL.
622 Idea from Andrew Clarke's itunes-url-decoder.py
625 if not 'phobos.apple.com' in url
.lower():
626 # This doesn't look like an iTunes URL
630 data
= http_get_and_gunzip(url
)
631 (url
,) = re
.findall("itmsOpen\('([^']*)", data
)
632 return parse_itunes_xml(url
)
637 def idle_add(func
, *args
):
639 This is a wrapper function that does the Right
640 Thing depending on if we are running a GTK+ GUI or
641 not. If not, we're simply calling the function.
643 If we are a GUI app, we use gobject.idle_add() to
644 call the function later - this is needed for
645 threads to be able to modify GTK+ widget data.
647 if gpodder
.interface_is_gui
:
652 gobject
.idle_add(func
, *args
)
657 def discover_bluetooth_devices():
659 This is a generator function that returns
660 (address, name) tuples of all nearby bluetooth
663 If the user has python-bluez installed, it will
664 be used. If not, we're trying to use "hcitool".
666 If neither python-bluez or hcitool are available,
667 this function is the empty generator.
670 # If the user has python-bluez installed
672 log('Using python-bluez to find nearby bluetooth devices')
673 for name
, addr
in bluetooth
.discover_devices(lookup_names
=True):
676 if find_command('hcitool') is not None:
677 log('Using hcitool to find nearby bluetooth devices')
678 # If the user has "hcitool" installed
679 p
= subprocess
.Popen(['hcitool', 'scan'], stdout
=subprocess
.PIPE
)
680 for line
in p
.stdout
:
681 match
= re
.match('^\t([^\t]+)\t([^\t]+)\n$', line
)
682 if match
is not None:
683 (addr
, name
) = match
.groups()
686 log('Cannot find either python-bluez or hcitool - no bluetooth?')
687 return # <= empty generator
690 def bluetooth_send_file(filename
, device
=None, callback_finished
=None):
692 Sends a file via bluetooth using gnome-obex send.
693 Optional parameter device is the bluetooth address
694 of the device; optional parameter callback_finished
695 is a callback function that will be called when the
696 sending process has finished - it gets one parameter
697 that is either True (when sending succeeded) or False
698 when there was some error.
700 This function tries to use "bluetooth-sendto", and if
701 it is not available, it also tries "gnome-obex-send".
705 if find_command('bluetooth-sendto'):
706 command_line
= ['bluetooth-sendto']
707 if device
is not None:
708 command_line
.append('--device=%s' % device
)
709 elif find_command('gnome-obex-send'):
710 command_line
= ['gnome-obex-send']
711 if device
is not None:
712 command_line
+= ['--dest', device
]
714 if command_line
is not None:
715 command_line
.append(filename
)
716 result
= (subprocess
.Popen(command_line
).wait() == 0)
717 if callback_finished
is not None:
718 callback_finished(result
)
721 log('Cannot send file. Please install "bluetooth-sendto" or "gnome-obex-send".')
722 if callback_finished
is not None:
723 callback_finished(False)
727 def format_seconds_to_hour_min_sec(seconds
):
729 Take the number of seconds and format it into a
730 human-readable string (duration).
732 >>> format_seconds_to_hour_min_sec(3834)
733 '1 hour, 3 minutes and 54 seconds'
734 >>> format_seconds_to_hour_min_sec(2600)
736 >>> format_seconds_to_hour_min_sec(62)
737 '1 minute and 2 seconds'
741 return _('0 seconds')
746 seconds
= seconds
%3600
752 result
.append(_('1 hour'))
754 result
.append(_('%i hours') % hours
)
757 result
.append(_('1 minute'))
759 result
.append(_('%i minutes') % minutes
)
762 result
.append(_('1 second'))
764 result
.append(_('%i seconds') % seconds
)
767 return (' '+_('and')+' ').join((', '.join(result
[:-1]), result
[-1]))
772 def get_episode_info_from_url(url
, proxy
=None):
774 Try to get information about a podcast episode by sending
775 a HEAD request to the HTTP server and parsing the result.
777 The return value is a dict containing all fields that
778 could be parsed from the URL. This currently contains:
780 "length": The size of the file in bytes
781 "pubdate": A formatted representation of the pubDate
783 If the "proxy" parameter is used, it has to be the URL
784 of the HTTP proxy server to use, e.g. http://proxy:8080/
786 If there is an error, this function returns {}. This will
787 only function with http:// and https:// URLs.
789 if not (url
.startswith('http://') or url
.startswith('https://')):
792 if proxy
is None or proxy
.strip() == '':
793 (scheme
, netloc
, path
, parms
, qry
, fragid
) = urlparse
.urlparse(url
)
794 conn
= httplib
.HTTPConnection(netloc
)
795 start
= len(scheme
) + len('://') + len(netloc
)
796 conn
.request('HEAD', url
[start
:])
798 (scheme
, netloc
, path
, parms
, qry
, fragid
) = urlparse
.urlparse(proxy
)
799 conn
= httplib
.HTTPConnection(netloc
)
800 conn
.request('HEAD', url
)
802 r
= conn
.getresponse()
805 log('Trying to get metainfo for %s', url
)
807 if 'content-length' in r
.msg
:
809 length
= int(r
.msg
['content-length'])
810 result
['length'] = length
811 except ValueError, e
:
812 log('Error converting content-length header.')
814 if 'last-modified' in r
.msg
:
816 parsed_date
= feedparser
._parse
_date
(r
.msg
['last-modified'])
817 pubdate
= updated_parsed_to_rfc2822(parsed_date
)
818 result
['pubdate'] = pubdate
820 log('Error converting last-modified header.')
825 def gui_open(filename
):
827 Open a file or folder with the default application set
828 by the Desktop environment. This uses "xdg-open".
831 subprocess
.Popen(['xdg-open', filename
])
832 # FIXME: Win32-specific "open" code needed here
833 # as fallback when xdg-open not available
835 log('Cannot open file/folder: "%s"', folder
, sender
=self
, traceback
=True)
838 def open_website(url
):
840 Opens the specified URL using the default system web
841 browser. This uses Python's "webbrowser" module, so
842 make sure your system is set up correctly.
844 threading
.Thread(target
=webbrowser
.open, args
=(url
,)).start()
847 def sanitize_filename(filename
):
849 Generate a sanitized version of a filename that can
850 be written on disk (i.e. remove/replace invalid
851 characters and encode in the native language)
853 # Try to detect OS encoding (by Leonid Ponomarev)
854 if 'LANG' in os
.environ
and '.' in os
.environ
['LANG']:
855 lang
= os
.environ
['LANG']
856 (language
, encoding
) = lang
.rsplit('.', 1)
857 log('Detected encoding: %s', encoding
)
860 # Using iso-8859-15 here as (hopefully) sane default
861 # see http://en.wikipedia.org/wiki/ISO/IEC_8859-1
862 log('Using ISO-8859-15 as encoding. If this')
863 log('is incorrect, please set your $LANG variable.')
866 return re
.sub('[/|?*<>:+\[\]\"\\\]', '_', filename
.strip().encode(enc
, 'ignore'))