src/gpodder/util.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20 #
  21 #  util.py -- Misc utility functions
  22 #  Thomas Perl <thp@perli.net> 2007-08-04
  23 #
  24
  25 """Miscellaneous helper functions for gPodder
  26
  27 This module provides helper and utility functions for gPodder that
  28 are not tied to any specific part of gPodder.
  29
  30 """
  31
  32 import gpodder
  33 from gpodder.liblogger import log
  34
  35 import os
  36 import os.path
  37 import platform
  38 import glob
  39 import stat
  40 import shlex
  41 import socket
  42
  43 import re
  44 import subprocess
  45 from htmlentitydefs import entitydefs
  46 import time
  47 import locale
  48 import gzip
  49 import datetime
  50 import threading
  51
  52 import urlparse
  53 import urllib
  54 import urllib2
  55 import httplib
  56 import webbrowser
  57 import mimetypes
  58
  59 import feedparser
  60
  61 import StringIO
  62 import xml.dom.minidom
  63
  64 _ = gpodder.gettext
  65 N_ = gpodder.ngettext
  66
  67
  68 # Try to detect OS encoding (by Leonid Ponomarev)
  69 if gpodder.ui.maemo:
  70     encoding = 'utf8'
  71 else:
  72     encoding = 'iso-8859-15'
  73
  74 if 'LANG' in os.environ and '.' in os.environ['LANG']:
  75     lang = os.environ['LANG']
  76     (language, encoding) = lang.rsplit('.', 1)
  77     log('Detected encoding: %s', encoding)
  78     enc = encoding
  79 else:
  80     # Using iso-8859-15 here as (hopefully) sane default
  81     # see http://en.wikipedia.org/wiki/ISO/IEC_8859-1
  82     log('Using ISO-8859-15 as encoding. If this')
  83     log('is incorrect, please set your $LANG variable.')
  84
  85
  86 # Used by file_type_by_extension()
  87 _BUILTIN_FILE_TYPES = None
  88
  89
  90 def make_directory( path):
  91     """
  92     Tries to create a directory if it does not exist already.
  93     Returns True if the directory exists after the function
  94     call, False otherwise.
  95     """
  96     if os.path.isdir( path):
  97         return True
  98
  99     try:
 100         os.makedirs( path)
 101     except:
 102         log( 'Could not create directory: %s', path)
 103         return False
 104
 105     return True
 106
 107
 108 def normalize_feed_url(url):
 109     """
 110     Converts any URL to http:// or ftp:// so that it can be
 111     used with "wget". If the URL cannot be converted (invalid
 112     or unknown scheme), "None" is returned.
 113
 114     This will also normalize feed:// and itpc:// to http://
 115     Also supported are phobos.apple.com links (iTunes podcast)
 116     and itms:// links (iTunes podcast direct link).
 117
 118     >>> normalize_feed_url('itpc://example.org/podcast.rss')
 119     'http://example.org/podcast.rss'
 120
 121     If no URL scheme is defined (e.g. "curry.com"), we will
 122     simply assume the user intends to add a http:// feed.
 123
 124     >>> normalize_feed_url('curry.com')
 125     'http://curry.com'
 126
 127     There are even some more shortcuts for advanced users
 128     and lazy typists (see the source for details).
 129
 130     >>> normalize_feed_url('fb:43FPodcast')
 131     'http://feeds.feedburner.com/43FPodcast'
 132     """
 133     if not url or len(url) < 8:
 134         return None
 135
 136     # This is a list of prefixes that you can use to minimize the amount of
 137     # keystrokes that you have to use.
 138     # Feel free to suggest other useful prefixes, and I'll add them here.
 139     PREFIXES = {
 140             'fb:': 'http://feeds.feedburner.com/%s',
 141             'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
 142             'sc:': 'http://soundcloud.com/%s',
 143     }
 144
 145     for prefix, expansion in PREFIXES.iteritems():
 146         if url.startswith(prefix):
 147             url = expansion % (url[len(prefix):],)
 148             break
 149
 150     # Assume HTTP for URLs without scheme
 151     if not '://' in url:
 152         url = 'http://' + url
 153
 154     # The scheme of the URL should be all-lowercase
 155     (scheme, rest) = url.split('://', 1)
 156     scheme = scheme.lower()
 157
 158     # Remember to parse iTunes XML for itms:// URLs
 159     do_parse_itunes_xml = (scheme == 'itms')
 160
 161     # feed://, itpc:// and itms:// are really http://
 162     if scheme in ('feed', 'itpc', 'itms'):
 163         scheme = 'http'
 164
 165     # Re-assemble our URL
 166     url = scheme + '://' + rest
 167
 168     # If we had an itms:// URL, parse XML
 169     if do_parse_itunes_xml:
 170         url = parse_itunes_xml(url)
 171
 172     # Links to "phobos.apple.com"
 173     url = itunes_discover_rss(url)
 174
 175     if scheme in ('http', 'https', 'ftp'):
 176         return url
 177
 178     return None
 179
 180
 181 def username_password_from_url(url):
 182     r"""
 183     Returns a tuple (username,password) containing authentication
 184     data from the specified URL or (None,None) if no authentication
 185     data can be found in the URL.
 186
 187     See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
 188
 189     >>> username_password_from_url('https://@host.com/')
 190     ('', None)
 191     >>> username_password_from_url('telnet://host.com/')
 192     (None, None)
 193     >>> username_password_from_url('ftp://foo:@host.com/')
 194     ('foo', '')
 195     >>> username_password_from_url('http://a:b@host.com/')
 196     ('a', 'b')
 197     >>> username_password_from_url(1)
 198     Traceback (most recent call last):
 199       ...
 200     ValueError: URL has to be a string or unicode object.
 201     >>> username_password_from_url(None)
 202     Traceback (most recent call last):
 203       ...
 204     ValueError: URL has to be a string or unicode object.
 205     >>> username_password_from_url('http://a@b:c@host.com/')
 206     Traceback (most recent call last):
 207       ...
 208     ValueError: "@" must be encoded for username/password (RFC1738).
 209     >>> username_password_from_url('ftp://a:b:c@host.com/')
 210     Traceback (most recent call last):
 211       ...
 212     ValueError: ":" must be encoded for username/password (RFC1738).
 213     >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
 214     ('i/o', 'P@ss:')
 215     >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
 216     ('\xc3\xb6sterreich', None)
 217     """
 218     if type(url) not in (str, unicode):
 219         raise ValueError('URL has to be a string or unicode object.')
 220
 221     (username, password) = (None, None)
 222
 223     (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
 224
 225     if '@' in netloc:
 226         (authentication, netloc) = netloc.rsplit('@', 1)
 227         if ':' in authentication:
 228             (username, password) = authentication.split(':', 1)
 229             # RFC1738 dictates that we should not allow these unquoted
 230             # characters in the username and password field (Section 3.1).
 231             for c in (':', '@', '/'):
 232                 if c in username or c in password:
 233                     raise ValueError('"%c" must be encoded for username/password (RFC1738).' % c)
 234             username = urllib.unquote(username)
 235             password = urllib.unquote(password)
 236         else:
 237             username = urllib.unquote(authentication)
 238
 239     return (username, password)
 240
 241
 242 def directory_is_writable( path):
 243     """
 244     Returns True if the specified directory exists and is writable
 245     by the current user.
 246     """
 247     return os.path.isdir( path) and os.access( path, os.W_OK)
 248
 249
 250 def calculate_size( path):
 251     """
 252     Tries to calculate the size of a directory, including any
 253     subdirectories found. The returned value might not be
 254     correct if the user doesn't have appropriate permissions
 255     to list all subdirectories of the given path.
 256     """
 257     if path is None:
 258         return 0L
 259
 260     if os.path.dirname( path) == '/':
 261         return 0L
 262
 263     if os.path.isfile( path):
 264         return os.path.getsize( path)
 265
 266     if os.path.isdir( path) and not os.path.islink( path):
 267         sum = os.path.getsize( path)
 268
 269         try:
 270             for item in os.listdir(path):
 271                 try:
 272                     sum += calculate_size(os.path.join(path, item))
 273                 except:
 274                     log('Cannot get size for %s', path)
 275         except:
 276             log('Cannot access: %s', path)
 277
 278         return sum
 279
 280     return 0L
 281
 282
 283 def file_modification_datetime(filename):
 284     """
 285     Returns the modification date of the specified file
 286     as a datetime.datetime object or None if the modification
 287     date cannot be determined.
 288     """
 289     if filename is None:
 290         return None
 291
 292     if not os.access(filename, os.R_OK):
 293         return None
 294
 295     try:
 296         s = os.stat(filename)
 297         timestamp = s[stat.ST_MTIME]
 298         return datetime.datetime.fromtimestamp(timestamp)
 299     except:
 300         log('Cannot get modification timestamp for %s', filename)
 301         return None
 302
 303
 304 def file_modification_timestamp(filename):
 305     """
 306     Returns the modification date of the specified file as a number
 307     or -1 if the modification date cannot be determined.
 308     """
 309     if filename is None:
 310         return -1
 311     try:
 312         s = os.stat(filename)
 313         return s[stat.ST_MTIME]
 314     except:
 315         log('Cannot get modification timestamp for %s', filename)
 316         return -1
 317
 318
 319 def file_age_in_days(filename):
 320     """
 321     Returns the age of the specified filename in days or
 322     zero if the modification date cannot be determined.
 323     """
 324     dt = file_modification_datetime(filename)
 325     if dt is None:
 326         return 0
 327     else:
 328         return (datetime.datetime.now()-dt).days
 329
 330
 331 def file_age_to_string(days):
 332     """
 333     Converts a "number of days" value to a string that
 334     can be used in the UI to display the file age.
 335
 336     >>> file_age_to_string(0)
 337     ''
 338     >>> file_age_to_string(1)
 339     u'1 day ago'
 340     >>> file_age_to_string(2)
 341     u'2 days ago'
 342     """
 343     if days < 1:
 344         return ''
 345     else:
 346         return N_('%d day ago', '%d days ago', days) % days
 347
 348
 349 def get_free_disk_space_win32(path):
 350     """
 351     Win32-specific code to determine the free disk space remaining
 352     for a given path. Uses code from:
 353
 354     http://mail.python.org/pipermail/python-list/2003-May/203223.html
 355     """
 356
 357     drive, tail = os.path.splitdrive(path)
 358
 359     try:
 360         import win32file
 361         userFree, userTotal, freeOnDisk = win32file.GetDiskFreeSpaceEx(drive)
 362         return userFree
 363     except ImportError:
 364         log('Warning: Running on Win32 but win32api/win32file not installed.')
 365
 366     # Cannot determine free disk space
 367     return 0
 368
 369
 370 def get_free_disk_space(path):
 371     """
 372     Calculates the free disk space available to the current user
 373     on the file system that contains the given path.
 374
 375     If the path (or its parent folder) does not yet exist, this
 376     function returns zero.
 377     """
 378
 379     if not os.path.exists(path):
 380         return 0
 381
 382     if gpodder.win32:
 383         return get_free_disk_space_win32(path)
 384
 385     s = os.statvfs(path)
 386
 387     return s.f_bavail * s.f_bsize
 388
 389
 390 def format_date(timestamp):
 391     """
 392     Converts a UNIX timestamp to a date representation. This
 393     function returns "Today", "Yesterday", a weekday name or
 394     the date in %x format, which (according to the Python docs)
 395     is the "Locale's appropriate date representation".
 396
 397     Returns None if there has been an error converting the
 398     timestamp to a string representation.
 399     """
 400     if timestamp is None:
 401         return None
 402
 403     seconds_in_a_day = 60*60*24
 404
 405     today = time.localtime()[:3]
 406     yesterday = time.localtime(time.time() - seconds_in_a_day)[:3]
 407     try:
 408         timestamp_date = time.localtime(timestamp)[:3]
 409     except ValueError, ve:
 410         log('Warning: Cannot convert timestamp', traceback=True)
 411         return None
 412
 413     if timestamp_date == today:
 414        return _('Today')
 415     elif timestamp_date == yesterday:
 416        return _('Yesterday')
 417
 418     try:
 419         diff = int( (time.time() - timestamp)/seconds_in_a_day )
 420     except:
 421         log('Warning: Cannot convert "%s" to date.', timestamp, traceback=True)
 422         return None
 423
 424     try:
 425         timestamp = datetime.datetime.fromtimestamp(timestamp)
 426     except:
 427         return None
 428
 429     if diff < 7:
 430         # Weekday name
 431         return str(timestamp.strftime('%A'))
 432     else:
 433         # Locale's appropriate date representation
 434         return str(timestamp.strftime('%x'))
 435
 436
 437 def format_filesize(bytesize, use_si_units=False, digits=2):
 438     """
 439     Formats the given size in bytes to be human-readable,
 440
 441     Returns a localized "(unknown)" string when the bytesize
 442     has a negative value.
 443     """
 444     si_units = (
 445             ( 'kB', 10**3 ),
 446             ( 'MB', 10**6 ),
 447             ( 'GB', 10**9 ),
 448     )
 449
 450     binary_units = (
 451             ( 'KiB', 2**10 ),
 452             ( 'MiB', 2**20 ),
 453             ( 'GiB', 2**30 ),
 454     )
 455
 456     try:
 457         bytesize = float( bytesize)
 458     except:
 459         return _('(unknown)')
 460
 461     if bytesize < 0:
 462         return _('(unknown)')
 463
 464     if use_si_units:
 465         units = si_units
 466     else:
 467         units = binary_units
 468
 469     ( used_unit, used_value ) = ( 'B', bytesize )
 470
 471     for ( unit, value ) in units:
 472         if bytesize >= value:
 473             used_value = bytesize / float(value)
 474             used_unit = unit
 475
 476     return ('%.'+str(digits)+'f %s') % (used_value, used_unit)
 477
 478
 479 def delete_file( path):
 480     """
 481     Tries to delete the given filename and silently
 482     ignores deletion errors (if the file doesn't exist).
 483     Also deletes extracted cover files if they exist.
 484     """
 485     log( 'Trying to delete: %s', path)
 486     try:
 487         os.unlink( path)
 488         # Remove any extracted cover art that might exist
 489         for cover_file in glob.glob( '%s.cover.*' % ( path, )):
 490             os.unlink( cover_file)
 491
 492     except:
 493         pass
 494
 495
 496
 497 def remove_html_tags(html):
 498     """
 499     Remove HTML tags from a string and replace numeric and
 500     named entities with the corresponding character, so the
 501     HTML text can be displayed in a simple text view.
 502     """
 503     # If we would want more speed, we could make these global
 504     re_strip_tags = re.compile('<[^>]*>')
 505     re_unicode_entities = re.compile('&#(\d{2,4});')
 506     re_html_entities = re.compile('&(.{2,8});')
 507     re_newline_tags = re.compile('(<br[^>]*>|<[/]?ul[^>]*>|</li>)', re.I)
 508     re_listing_tags = re.compile('<li[^>]*>', re.I)
 509
 510     result = html
 511
 512     # Convert common HTML elements to their text equivalent
 513     result = re_newline_tags.sub('\n', result)
 514     result = re_listing_tags.sub('\n * ', result)
 515     result = re.sub('<[Pp]>', '\n\n', result)
 516
 517     # Remove all HTML/XML tags from the string
 518     result = re_strip_tags.sub('', result)
 519
 520     # Convert numeric XML entities to their unicode character
 521     result = re_unicode_entities.sub(lambda x: unichr(int(x.group(1))), result)
 522
 523     # Convert named HTML entities to their unicode character
 524     result = re_html_entities.sub(lambda x: unicode(entitydefs.get(x.group(1),''), 'iso-8859-1'), result)
 525
 526     # Convert more than two newlines to two newlines
 527     result = re.sub('([\r\n]{2})([\r\n])+', '\\1', result)
 528
 529     return result.strip()
 530
 531
 532 def extension_from_mimetype(mimetype):
 533     """
 534     Simply guesses what the file extension should be from the mimetype
 535     """
 536     return mimetypes.guess_extension(mimetype) or ''
 537
 538
 539 def extension_correct_for_mimetype(extension, mimetype):
 540     """
 541     Check if the given filename extension (e.g. ".ogg") is a possible
 542     extension for a given mimetype (e.g. "application/ogg") and return
 543     a boolean value (True if it's possible, False if not). Also do
 544
 545     >>> extension_correct_for_mimetype('.ogg', 'application/ogg')
 546     True
 547     >>> extension_correct_for_mimetype('.ogv', 'video/ogg')
 548     True
 549     >>> extension_correct_for_mimetype('.ogg', 'audio/mpeg')
 550     False
 551     >>> extension_correct_for_mimetype('mp3', 'audio/mpeg')
 552     Traceback (most recent call last):
 553       ...
 554     ValueError: "mp3" is not an extension (missing .)
 555     >>> extension_correct_for_mimetype('.mp3', 'audio mpeg')
 556     Traceback (most recent call last):
 557       ...
 558     ValueError: "audio mpeg" is not a mimetype (missing /)
 559     """
 560     if not '/' in mimetype:
 561         raise ValueError('"%s" is not a mimetype (missing /)' % mimetype)
 562     if not extension.startswith('.'):
 563         raise ValueError('"%s" is not an extension (missing .)' % extension)
 564
 565     # Create a "default" extension from the mimetype, e.g. "application/ogg"
 566     # becomes ".ogg", "audio/mpeg" becomes ".mpeg", etc...
 567     default = ['.'+mimetype.split('/')[-1]]
 568
 569     return extension in default+mimetypes.guess_all_extensions(mimetype)
 570
 571
 572 def filename_from_url(url):
 573     """
 574     Extracts the filename and (lowercase) extension (with dot)
 575     from a URL, e.g. http://server.com/file.MP3?download=yes
 576     will result in the string ("file", ".mp3") being returned.
 577
 578     This function will also try to best-guess the "real"
 579     extension for a media file (audio, video) by
 580     trying to match an extension to these types and recurse
 581     into the query string to find better matches, if the
 582     original extension does not resolve to a known type.
 583
 584     http://my.net/redirect.php?my.net/file.ogg => ("file", ".ogg")
 585     http://server/get.jsp?file=/episode0815.MOV => ("episode0815", ".mov")
 586     http://s/redirect.mp4?http://serv2/test.mp4 => ("test", ".mp4")
 587     """
 588     (scheme, netloc, path, para, query, fragid) = urlparse.urlparse(url)
 589     (filename, extension) = os.path.splitext(os.path.basename( urllib.unquote(path)))
 590
 591     if file_type_by_extension(extension) is not None and not \
 592         query.startswith(scheme+'://'):
 593         # We have found a valid extension (audio, video)
 594         # and the query string doesn't look like a URL
 595         return ( filename, extension.lower() )
 596
 597     # If the query string looks like a possible URL, try that first
 598     if len(query.strip()) > 0 and query.find('/') != -1:
 599         query_url = '://'.join((scheme, urllib.unquote(query)))
 600         (query_filename, query_extension) = filename_from_url(query_url)
 601
 602         if file_type_by_extension(query_extension) is not None:
 603             return os.path.splitext(os.path.basename(query_url))
 604
 605     # No exact match found, simply return the original filename & extension
 606     return ( filename, extension.lower() )
 607
 608
 609 def file_type_by_extension(extension):
 610     """
 611     Tries to guess the file type by looking up the filename
 612     extension from a table of known file types. Will return
 613     "audio", "video" or None.
 614
 615     >>> file_type_by_extension('.aif')
 616     'audio'
 617     >>> file_type_by_extension('.3GP')
 618     'video'
 619     >>> file_type_by_extension('.txt') is None
 620     True
 621     >>> file_type_by_extension(None) is None
 622     True
 623     >>> file_type_by_extension('ogg')
 624     Traceback (most recent call last):
 625       ...
 626     ValueError: Extension does not start with a dot: ogg
 627     """
 628     if not extension:
 629         return None
 630
 631     if not extension.startswith('.'):
 632         raise ValueError('Extension does not start with a dot: %s' % extension)
 633
 634     global _BUILTIN_FILE_TYPES
 635     if _BUILTIN_FILE_TYPES is None:
 636         # List all types that are not in the default mimetypes.types_map
 637         # (even if they might be detected by mimetypes.guess_type)
 638         # For OGG, see http://wiki.xiph.org/MIME_Types_and_File_Extensions
 639         audio_types = ('.ogg', '.oga', '.spx', '.flac', '.axa', \
 640                        '.aac', '.m4a', '.m4b', '.wma')
 641         video_types = ('.ogv', '.axv', '.mp4', \
 642                        '.mkv', '.m4v', '.divx', '.flv', '.wmv', '.3gp')
 643         _BUILTIN_FILE_TYPES = {}
 644         _BUILTIN_FILE_TYPES.update((ext, 'audio') for ext in audio_types)
 645         _BUILTIN_FILE_TYPES.update((ext, 'video') for ext in video_types)
 646
 647     extension = extension.lower()
 648
 649     if extension in _BUILTIN_FILE_TYPES:
 650         return _BUILTIN_FILE_TYPES[extension]
 651
 652     # Need to prepend something to the extension, so guess_type works
 653     type, encoding = mimetypes.guess_type('file'+extension)
 654
 655     if type is not None and '/' in type:
 656         filetype, rest = type.split('/', 1)
 657         if filetype in ('audio', 'video', 'image'):
 658             return filetype
 659
 660     return None
 661
 662
 663 def get_first_line( s):
 664     """
 665     Returns only the first line of a string, stripped so
 666     that it doesn't have whitespace before or after.
 667     """
 668     return s.strip().split('\n')[0].strip()
 669
 670
 671 def object_string_formatter( s, **kwargs):
 672     """
 673     Makes attributes of object passed in as keyword
 674     arguments available as {OBJECTNAME.ATTRNAME} in
 675     the passed-in string and returns a string with
 676     the above arguments replaced with the attribute
 677     values of the corresponding object.
 678
 679     Example:
 680
 681     e = Episode()
 682     e.title = 'Hello'
 683     s = '{episode.title} World'
 684
 685     print object_string_formatter( s, episode = e)
 686           => 'Hello World'
 687     """
 688     result = s
 689     for ( key, o ) in kwargs.items():
 690         matches = re.findall( r'\{%s\.([^\}]+)\}' % key, s)
 691         for attr in matches:
 692             if hasattr( o, attr):
 693                 try:
 694                     from_s = '{%s.%s}' % ( key, attr )
 695                     to_s = getattr( o, attr)
 696                     result = result.replace( from_s, to_s)
 697                 except:
 698                     log( 'Could not replace attribute "%s" in string "%s".', attr, s)
 699
 700     return result
 701
 702
 703 def format_desktop_command(command, filenames):
 704     """
 705     Formats a command template from the "Exec=" line of a .desktop
 706     file to a string that can be invoked in a shell.
 707
 708     Handled format strings: %U, %u, %F, %f and a fallback that
 709     appends the filename as first parameter of the command.
 710
 711     See http://standards.freedesktop.org/desktop-entry-spec/1.0/ar01s06.html
 712
 713     Returns a list of commands to execute, either one for
 714     each filename if the application does not support multiple
 715     file names or one for all filenames (%U, %F or unknown).
 716     """
 717     command = shlex.split(command)
 718
 719     command_before = command
 720     command_after = []
 721     multiple_arguments = True
 722     for fieldcode in ('%U', '%F', '%u', '%f'):
 723         if fieldcode in command:
 724             command_before = command[:command.index(fieldcode)]
 725             command_after = command[command.index(fieldcode)+1:]
 726             multiple_arguments = fieldcode in ('%U', '%F')
 727             break
 728
 729     if multiple_arguments:
 730         return [command_before + filenames + command_after]
 731
 732     commands = []
 733     for filename in filenames:
 734         commands.append(command_before+[filename]+command_after)
 735
 736     return commands
 737
 738 def url_strip_authentication(url):
 739     """
 740     Strips authentication data from an URL. Returns the URL with
 741     the authentication data removed from it.
 742
 743     >>> url_strip_authentication('https://host.com/')
 744     'https://host.com/'
 745     >>> url_strip_authentication('telnet://foo:bar@host.com/')
 746     'telnet://host.com/'
 747     >>> url_strip_authentication('ftp://billy@example.org')
 748     'ftp://example.org'
 749     >>> url_strip_authentication('ftp://billy:@example.org')
 750     'ftp://example.org'
 751     >>> url_strip_authentication('http://aa:bc@localhost/x')
 752     'http://localhost/x'
 753     >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
 754     'http://blubb.lan/u.html'
 755     >>> url_strip_authentication('http://c:d@x.org/')
 756     'http://x.org/'
 757     >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
 758     'http://cx.lan'
 759     """
 760     url_parts = list(urlparse.urlsplit(url))
 761     # url_parts[1] is the HOST part of the URL
 762
 763     # Remove existing authentication data
 764     if '@' in url_parts[1]:
 765         url_parts[1] = url_parts[1].split('@', 2)[1]
 766
 767     return urlparse.urlunsplit(url_parts)
 768
 769
 770 def url_add_authentication(url, username, password):
 771     """
 772     Adds authentication data (username, password) to a given
 773     URL in order to construct an authenticated URL.
 774
 775     >>> url_add_authentication('https://host.com/', '', None)
 776     'https://host.com/'
 777     >>> url_add_authentication('http://example.org/', None, None)
 778     'http://example.org/'
 779     >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
 780     'telnet://foo:bar@host.com/'
 781     >>> url_add_authentication('ftp://example.org', 'billy', None)
 782     'ftp://billy@example.org'
 783     >>> url_add_authentication('ftp://example.org', 'billy', '')
 784     'ftp://billy:@example.org'
 785     >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
 786     'http://aa:bc@localhost/x'
 787     >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
 788     'http://i%2Fo:P%40ss%3A@blubb.lan/u.html'
 789     >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
 790     'http://c:d@x.org/'
 791     >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@:', 'i/')
 792     'http://P%40%3A:i%2F@cx.lan'
 793     """
 794     if username is None or username == '':
 795         return url
 796
 797     username = urllib.quote_plus(username)
 798
 799     if password is not None:
 800         password = urllib.quote_plus(password)
 801         auth_string = ':'.join((username, password))
 802     else:
 803         auth_string = username
 804
 805     url = url_strip_authentication(url)
 806
 807     url_parts = list(urlparse.urlsplit(url))
 808     # url_parts[1] is the HOST part of the URL
 809     url_parts[1] = '@'.join((auth_string, url_parts[1]))
 810
 811     return urlparse.urlunsplit(url_parts)
 812
 813
 814 def get_real_url(url):
 815     """
 816     Gets the real URL of a file and resolves all redirects.
 817     """
 818     try:
 819         username, password = username_password_from_url(url)
 820         if username or password:
 821             url = url_strip_authentication(url)
 822             log('url=%s, username=%s, password=%s', url, username, password)
 823             password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 824             password_mgr.add_password(None, url, username, password)
 825             handler = urllib2.HTTPBasicAuthHandler(password_mgr)
 826             opener = urllib2.build_opener(handler)
 827             return opener.open(url).geturl()
 828         else:
 829             return urlopen(url).geturl()
 830     except:
 831         log('Error getting real url for %s', url, traceback=True)
 832         return url
 833
 834 def urlopen(url):
 835     """
 836     An URL opener with the User-agent set to gPodder (with version)
 837     """
 838     headers = {'User-agent': gpodder.user_agent}
 839     request = urllib2.Request(url, headers=headers)
 840     return urllib2.urlopen(request)
 841
 842 def find_command( command):
 843     """
 844     Searches the system's PATH for a specific command that is
 845     executable by the user. Returns the first occurence of an
 846     executable binary in the PATH, or None if the command is
 847     not available.
 848     """
 849
 850     if 'PATH' not in os.environ:
 851         return None
 852
 853     for path in os.environ['PATH'].split( os.pathsep):
 854         command_file = os.path.join( path, command)
 855         if os.path.isfile( command_file) and os.access( command_file, os.X_OK):
 856             return command_file
 857
 858     return None
 859
 860
 861 def parse_itunes_xml(url):
 862     """
 863     Parses an XML document in the "url" parameter (this has to be
 864     a itms:// or http:// URL to a XML doc) and searches all "<dict>"
 865     elements for the first occurence of a "<key>feedURL</key>"
 866     element and then continues the search for the string value of
 867     this key.
 868
 869     This returns the RSS feed URL for Apple iTunes Podcast XML
 870     documents that are retrieved by itunes_discover_rss().
 871     """
 872     url = url.replace('itms://', 'http://')
 873     doc = http_get_and_gunzip(url)
 874     try:
 875         d = xml.dom.minidom.parseString(doc)
 876     except Exception, e:
 877         log('Error parsing document from itms:// URL: %s', e)
 878         return None
 879     last_key = None
 880     for pairs in d.getElementsByTagName('dict'):
 881         for node in pairs.childNodes:
 882             if node.nodeType != node.ELEMENT_NODE:
 883                 continue
 884
 885             if node.tagName == 'key' and node.childNodes.length > 0:
 886                 if node.firstChild.nodeType == node.TEXT_NODE:
 887                     last_key = node.firstChild.data
 888
 889             if last_key != 'feedURL':
 890                 continue
 891
 892             if node.tagName == 'string' and node.childNodes.length > 0:
 893                 if node.firstChild.nodeType == node.TEXT_NODE:
 894                     return node.firstChild.data
 895
 896     return None
 897
 898
 899 def http_get_and_gunzip(uri):
 900     """
 901     Does a HTTP GET request and tells the server that we accept
 902     gzip-encoded data. This is necessary, because the Apple iTunes
 903     server will always return gzip-encoded data, regardless of what
 904     we really request.
 905
 906     Returns the uncompressed document at the given URI.
 907     """
 908     request = urllib2.Request(uri)
 909     request.add_header("Accept-encoding", "gzip")
 910     usock = urllib2.urlopen(request)
 911     data = usock.read()
 912     if usock.headers.get('content-encoding', None) == 'gzip':
 913         data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
 914     return data
 915
 916
 917 def itunes_discover_rss(url):
 918     """
 919     Takes an iTunes-specific podcast URL and turns it
 920     into a "normal" RSS feed URL. If the given URL is
 921     not a phobos.apple.com URL, we will simply return
 922     the URL and assume it's already an RSS feed URL.
 923
 924     Idea from Andrew Clarke's itunes-url-decoder.py
 925     """
 926
 927     if url is None:
 928         return url
 929
 930     if not 'phobos.apple.com' in url.lower():
 931         # This doesn't look like an iTunes URL
 932         return url
 933
 934     try:
 935         data = http_get_and_gunzip(url)
 936         (url,) = re.findall("itmsOpen\('([^']*)", data)
 937         return parse_itunes_xml(url)
 938     except:
 939         return None
 940
 941
 942 def idle_add(func, *args):
 943     """
 944     This is a wrapper function that does the Right
 945     Thing depending on if we are running a GTK+ GUI or
 946     not. If not, we're simply calling the function.
 947
 948     If we are a GUI app, we use gobject.idle_add() to
 949     call the function later - this is needed for
 950     threads to be able to modify GTK+ widget data.
 951     """
 952     if gpodder.ui.desktop or gpodder.ui.maemo:
 953         import gobject
 954         def x(f, *a):
 955             f(*a)
 956             return False
 957
 958         gobject.idle_add(func, *args)
 959     else:
 960         func(*args)
 961
 962
 963 def bluetooth_available():
 964     """
 965     Returns True or False depending on the availability
 966     of bluetooth functionality on the system.
 967     """
 968     if find_command('bluetooth-sendto') or \
 969             find_command('gnome-obex-send'):
 970         return True
 971     else:
 972         return False
 973
 974
 975 def bluetooth_send_file(filename):
 976     """
 977     Sends a file via bluetooth.
 978
 979     This function tries to use "bluetooth-sendto", and if
 980     it is not available, it also tries "gnome-obex-send".
 981     """
 982     command_line = None
 983
 984     if find_command('bluetooth-sendto'):
 985         command_line = ['bluetooth-sendto']
 986     elif find_command('gnome-obex-send'):
 987         command_line = ['gnome-obex-send']
 988
 989     if command_line is not None:
 990         command_line.append(filename)
 991         return (subprocess.Popen(command_line).wait() == 0)
 992     else:
 993         log('Cannot send file. Please install "bluetooth-sendto" or "gnome-obex-send".')
 994         return False
 995
 996
 997 def format_seconds_to_hour_min_sec(seconds):
 998     """
 999     Take the number of seconds and format it into a
1000     human-readable string (duration).
1001
1002     >>> format_seconds_to_hour_min_sec(3834)
1003     u'1 hour, 3 minutes and 54 seconds'
1004     >>> format_seconds_to_hour_min_sec(3600)
1005     u'1 hour'
1006     >>> format_seconds_to_hour_min_sec(62)
1007     u'1 minute and 2 seconds'
1008     """
1009
1010     if seconds < 1:
1011         return N_('%d second', '%d seconds', seconds) % seconds
1012
1013     result = []
1014
1015     seconds = int(seconds)
1016
1017     hours = seconds/3600
1018     seconds = seconds%3600
1019
1020     minutes = seconds/60
1021     seconds = seconds%60
1022
1023     if hours:
1024         result.append(N_('%d hour', '%d hours', hours) % hours)
1025
1026     if minutes:
1027         result.append(N_('%d minute', '%d minutes', minutes) % minutes)
1028
1029     if seconds:
1030         result.append(N_('%d second', '%d seconds', seconds) % seconds)
1031
1032     if len(result) > 1:
1033         return (' '+_('and')+' ').join((', '.join(result[:-1]), result[-1]))
1034     else:
1035         return result[0]
1036
1037 def http_request(url, method='HEAD'):
1038     (scheme, netloc, path, parms, qry, fragid) = urlparse.urlparse(url)
1039     conn = httplib.HTTPConnection(netloc)
1040     start = len(scheme) + len('://') + len(netloc)
1041     conn.request(method, url[start:])
1042     return conn.getresponse()
1043
1044 def get_episode_info_from_url(url):
1045     """
1046     Try to get information about a podcast episode by sending
1047     a HEAD request to the HTTP server and parsing the result.
1048
1049     The return value is a dict containing all fields that
1050     could be parsed from the URL. This currently contains:
1051
1052       "length": The size of the file in bytes
1053       "pubdate": The unix timestamp for the pubdate
1054
1055     If there is an error, this function returns {}. This will
1056     only function with http:// and https:// URLs.
1057     """
1058     if not (url.startswith('http://') or url.startswith('https://')):
1059         return {}
1060
1061     r = http_request(url)
1062     result = {}
1063
1064     log('Trying to get metainfo for %s', url)
1065
1066     if 'content-length' in r.msg:
1067         try:
1068             length = int(r.msg['content-length'])
1069             result['length'] = length
1070         except ValueError, e:
1071             log('Error converting content-length header.')
1072
1073     if 'last-modified' in r.msg:
1074         try:
1075             parsed_date = feedparser._parse_date(r.msg['last-modified'])
1076             pubdate = time.mktime(parsed_date)
1077             result['pubdate'] = pubdate
1078         except:
1079             log('Error converting last-modified header.')
1080
1081     return result
1082
1083
1084 def gui_open(filename):
1085     """
1086     Open a file or folder with the default application set
1087     by the Desktop environment. This uses "xdg-open" on all
1088     systems with a few exceptions:
1089
1090        on Win32, os.startfile() is used
1091        on Maemo, osso is used to communicate with Nokia Media Player
1092     """
1093     try:
1094         if gpodder.ui.maemo:
1095             try:
1096                 import osso
1097             except ImportError, ie:
1098                 log('Cannot import osso module on maemo.')
1099                 return False
1100
1101             log('Using Nokia Media Player to open %s', filename)
1102             context = osso.Context('gPodder', gpodder.__version__, False)
1103             filename = filename.encode('utf-8')
1104
1105             # Fix for Maemo bug 7162 (for local files with "#" in filename)
1106             if filename.startswith('/'):
1107                 filename = 'file://' + urllib.quote(filename)
1108
1109             rpc = osso.Rpc(context)
1110             app = 'mediaplayer'
1111
1112             _unneeded, extension = os.path.splitext(filename.lower())
1113
1114             # Fix for Maemo bug 5588 (use PDF viewer and images app)
1115             if extension == '.pdf':
1116                 app = 'osso_pdfviewer'
1117             elif extension in ('.jpg', '.jpeg', '.png'):
1118                 app = 'image_viewer'
1119
1120             svc, path = (x % app for x in ('com.nokia.%s', '/com/nokia/%s'))
1121             rpc.rpc_run(svc, path, svc, 'mime_open', (filename,))
1122         elif gpodder.win32:
1123             os.startfile(filename)
1124         else:
1125             subprocess.Popen(['xdg-open', filename])
1126         return True
1127     except:
1128         log('Cannot open file/folder: "%s"', filename, traceback=True)
1129         return False
1130
1131
1132 def open_website(url):
1133     """
1134     Opens the specified URL using the default system web
1135     browser. This uses Python's "webbrowser" module, so
1136     make sure your system is set up correctly.
1137     """
1138     if gpodder.ui.maemo:
1139         import osso
1140         context = osso.Context('gPodder', gpodder.__version__, False)
1141         rpc = osso.Rpc(context)
1142         rpc.rpc_run_with_defaults('osso_browser', \
1143                                   'open_new_window', \
1144                                   (url,))
1145     else:
1146         threading.Thread(target=webbrowser.open, args=(url,)).start()
1147
1148 def sanitize_encoding(filename):
1149     r"""
1150     Generate a sanitized version of a string (i.e.
1151     remove invalid characters and encode in the
1152     detected native language encoding).
1153
1154     >>> sanitize_encoding('\x80')
1155     ''
1156     >>> sanitize_encoding(u'unicode')
1157     'unicode'
1158     """
1159     global encoding
1160     if not isinstance(filename, unicode):
1161         filename = filename.decode(encoding, 'ignore')
1162     return filename.encode(encoding, 'ignore')
1163
1164
1165 def sanitize_filename(filename, max_length=0, use_ascii=False):
1166     """
1167     Generate a sanitized version of a filename that can
1168     be written on disk (i.e. remove/replace invalid
1169     characters and encode in the native language) and
1170     trim filename if greater than max_length (0 = no limit).
1171
1172     If use_ascii is True, don't encode in the native language,
1173     but use only characters from the ASCII character set.
1174     """
1175     global encoding
1176     if use_ascii:
1177         e = 'ascii'
1178     else:
1179         e = encoding
1180
1181     if not isinstance(filename, unicode):
1182         filename = filename.decode(encoding, 'ignore')
1183
1184     if max_length > 0 and len(filename) > max_length:
1185         log('Limiting file/folder name "%s" to %d characters.', filename, max_length)
1186         filename = filename[:max_length]
1187
1188     return re.sub('[/|?*<>:+\[\]\"\\\]', '_', filename.strip().encode(e, 'ignore'))
1189
1190
1191 def find_mount_point(directory):
1192     """
1193     Try to find the mount point for a given directory.
1194     If the directory is itself a mount point, return
1195     it. If not, remove the last part of the path and
1196     re-check if it's a mount point. If the directory
1197     resides on your root filesystem, "/" is returned.
1198
1199     >>> find_mount_point('/')
1200     '/'
1201
1202     >>> find_mount_point(u'/something')
1203     Traceback (most recent call last):
1204       ...
1205     ValueError: Convert unicode objects to str first.
1206
1207     >>> find_mount_point(None)
1208     Traceback (most recent call last):
1209       ...
1210     ValueError: Directory names should be of type str.
1211
1212     >>> find_mount_point(42)
1213     Traceback (most recent call last):
1214       ...
1215     ValueError: Directory names should be of type str.
1216
1217     >>> from minimock import mock, restore
1218     >>> mocked_mntpoints = ('/', '/home', '/media/usbdisk', '/media/cdrom')
1219     >>> mock('os.path.ismount', returns_func=lambda x: x in mocked_mntpoints)
1220     >>>
1221     >>> # For mocking os.getcwd(), we simply use a lambda to avoid the
1222     >>> # massive output of "Called os.getcwd()" lines in this doctest
1223     >>> os.getcwd = lambda: '/home/thp'
1224     >>>
1225     >>> find_mount_point('.')
1226     Called os.path.ismount('/home/thp')
1227     Called os.path.ismount('/home')
1228     '/home'
1229     >>> find_mount_point('relativity')
1230     Called os.path.ismount('/home/thp/relativity')
1231     Called os.path.ismount('/home/thp')
1232     Called os.path.ismount('/home')
1233     '/home'
1234     >>> find_mount_point('/media/usbdisk/')
1235     Called os.path.ismount('/media/usbdisk')
1236     '/media/usbdisk'
1237     >>> find_mount_point('/home/thp/Desktop')
1238     Called os.path.ismount('/home/thp/Desktop')
1239     Called os.path.ismount('/home/thp')
1240     Called os.path.ismount('/home')
1241     '/home'
1242     >>> find_mount_point('/media/usbdisk/Podcasts/With Spaces')
1243     Called os.path.ismount('/media/usbdisk/Podcasts/With Spaces')
1244     Called os.path.ismount('/media/usbdisk/Podcasts')
1245     Called os.path.ismount('/media/usbdisk')
1246     '/media/usbdisk'
1247     >>> find_mount_point('/home/')
1248     Called os.path.ismount('/home')
1249     '/home'
1250     >>> find_mount_point('/media/cdrom/../usbdisk/blubb//')
1251     Called os.path.ismount('/media/usbdisk/blubb')
1252     Called os.path.ismount('/media/usbdisk')
1253     '/media/usbdisk'
1254     >>> restore()
1255     """
1256     if isinstance(directory, unicode):
1257         # We do not accept unicode strings, because they could fail when
1258         # trying to be converted to some native encoding, so fail loudly
1259         # and leave it up to the callee to encode into the proper encoding.
1260         raise ValueError('Convert unicode objects to str first.')
1261
1262     if not isinstance(directory, str):
1263         raise ValueError('Directory names should be of type str.')
1264
1265     directory = os.path.abspath(directory)
1266
1267     while directory != '/':
1268         if os.path.ismount(directory):
1269             return directory
1270         else:
1271             (directory, tail_data) = os.path.split(directory)
1272
1273     return '/'
1274
1275
1276 # matches http:// and ftp:// and mailto://
1277 protocolPattern = re.compile(r'^\w+://')
1278
1279 def isabs(string):
1280     """
1281     @return true if string is an absolute path or protocoladdress
1282     for addresses beginning in http:// or ftp:// or ldap:// -
1283     they are considered "absolute" paths.
1284     Source: http://code.activestate.com/recipes/208993/
1285     """
1286     if protocolPattern.match(string): return 1
1287     return os.path.isabs(string)
1288
1289 def rel2abs(path, base = os.curdir):
1290     """ converts a relative path to an absolute path.
1291
1292     @param path the path to convert - if already absolute, is returned
1293     without conversion.
1294     @param base - optional. Defaults to the current directory.
1295     The base is intelligently concatenated to the given relative path.
1296     @return the relative path of path from base
1297     Source: http://code.activestate.com/recipes/208993/
1298     """
1299     if isabs(path): return path
1300     retval = os.path.join(base,path)
1301     return os.path.abspath(retval)
1302
1303 def commonpath(l1, l2, common=[]):
1304     """
1305     helper functions for relpath
1306     Source: http://code.activestate.com/recipes/208993/
1307     """
1308     if len(l1) < 1: return (common, l1, l2)
1309     if len(l2) < 1: return (common, l1, l2)
1310     if l1[0] != l2[0]: return (common, l1, l2)
1311     return commonpath(l1[1:], l2[1:], common+[l1[0]])
1312
1313 def relpath(p1, p2):
1314     """
1315     Finds relative path from p1 to p2
1316     Source: http://code.activestate.com/recipes/208993/
1317     """
1318     pathsplit = lambda s: s.split(os.path.sep)
1319
1320     (common,l1,l2) = commonpath(pathsplit(p1), pathsplit(p2))
1321     p = []
1322     if len(l1) > 0:
1323         p = [ ('..'+os.sep) * len(l1) ]
1324     p = p + l2
1325     if len(p) is 0:
1326         return "."
1327
1328     return os.path.join(*p)
1329
1330
1331 def run_external_command(command_line):
1332     """
1333     This is the function that will be called in a separate
1334     thread that will call an external command (specified by
1335     command_line). In case of problem (i.e. the command has
1336     not been found or there has been another error), we will
1337     call the notification function with two arguments - the
1338     first being the error message and the second being the
1339     title to be used for the error message.
1340
1341     >>> from minimock import mock, Mock, restore
1342     >>> mock('subprocess.Popen', returns=Mock('subprocess.Popen'))
1343     >>> run_external_command('testprogramm')
1344     Called subprocess.Popen('testprogramm', shell=True)
1345     Called subprocess.Popen.wait()
1346     >>> restore()
1347     """
1348
1349     def open_process(command_line):
1350         log('Running external command: %s', command_line)
1351         p = subprocess.Popen(command_line, shell=True)
1352         result = p.wait()
1353         if result == 127:
1354             log('Command not found: %s', command_line)
1355         elif result == 126:
1356             log('Command permission denied: %s', command_line)
1357         elif result > 0:
1358             log('Command returned an error (%d): %s', result, command_line)
1359         else:
1360             log('Command finished successfully: %s', command_line)
1361
1362     threading.Thread(target=open_process, args=(command_line,)).start()
1363
1364 def get_hostname():
1365     """Return the hostname of this computer
1366
1367     This can be implemented in a different way on each
1368     platform and should yield a unique-per-user device ID.
1369     """
1370     nodename = platform.node()
1371
1372     if nodename:
1373         return nodename
1374
1375     # Fallback - but can this give us "localhost"?
1376     return socket.gethostname()
1377