src/gpodder/util.py

   1 # -*- coding: utf-8 -*-
   2 #
   3 # gPodder - A media aggregator and podcast client
   4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
   5 #
   6 # gPodder is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 3 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # gPodder is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 #
  19
  20 #
  21 #  util.py -- Misc utility functions
  22 #  Thomas Perl <thp@perli.net> 2007-08-04
  23 #
  24
  25 """Miscellaneous helper functions for gPodder
  26
  27 This module provides helper and utility functions for gPodder that
  28 are not tied to any specific part of gPodder.
  29
  30 """
  31
  32 import gpodder
  33 from gpodder.liblogger import log
  34
  35 import os
  36 import os.path
  37 import platform
  38 import glob
  39 import stat
  40 import shlex
  41
  42 import re
  43 import subprocess
  44 from htmlentitydefs import entitydefs
  45 import time
  46 import locale
  47 import gzip
  48 import datetime
  49 import threading
  50
  51 import urlparse
  52 import urllib
  53 import urllib2
  54 import httplib
  55 import webbrowser
  56 import mimetypes
  57
  58 import feedparser
  59
  60 import StringIO
  61 import xml.dom.minidom
  62
  63 _ = gpodder.gettext
  64 N_ = gpodder.ngettext
  65
  66
  67 # Try to detect OS encoding (by Leonid Ponomarev)
  68 if gpodder.ui.maemo:
  69     encoding = 'utf8'
  70 else:
  71     encoding = 'iso-8859-15'
  72
  73 if 'LANG' in os.environ and '.' in os.environ['LANG']:
  74     lang = os.environ['LANG']
  75     (language, encoding) = lang.rsplit('.', 1)
  76     log('Detected encoding: %s', encoding)
  77     enc = encoding
  78 else:
  79     # Using iso-8859-15 here as (hopefully) sane default
  80     # see http://en.wikipedia.org/wiki/ISO/IEC_8859-1
  81     log('Using ISO-8859-15 as encoding. If this')
  82     log('is incorrect, please set your $LANG variable.')
  83
  84
  85 # Used by file_type_by_extension()
  86 _BUILTIN_FILE_TYPES = None
  87
  88
  89 def make_directory( path):
  90     """
  91     Tries to create a directory if it does not exist already.
  92     Returns True if the directory exists after the function
  93     call, False otherwise.
  94     """
  95     if os.path.isdir( path):
  96         return True
  97
  98     try:
  99         os.makedirs( path)
 100     except:
 101         log( 'Could not create directory: %s', path)
 102         return False
 103
 104     return True
 105
 106
 107 def normalize_feed_url(url):
 108     """
 109     Converts any URL to http:// or ftp:// so that it can be
 110     used with "wget". If the URL cannot be converted (invalid
 111     or unknown scheme), "None" is returned.
 112
 113     This will also normalize feed:// and itpc:// to http://
 114     Also supported are phobos.apple.com links (iTunes podcast)
 115     and itms:// links (iTunes podcast direct link).
 116
 117     >>> normalize_feed_url('itpc://example.org/podcast.rss')
 118     'http://example.org/podcast.rss'
 119
 120     If no URL scheme is defined (e.g. "curry.com"), we will
 121     simply assume the user intends to add a http:// feed.
 122
 123     >>> normalize_feed_url('curry.com')
 124     'http://curry.com'
 125
 126     There are even some more shortcuts for advanced users
 127     and lazy typists (see the source for details).
 128
 129     >>> normalize_feed_url('fb:43FPodcast')
 130     'http://feeds2.feedburner.com/43FPodcast'
 131     """
 132     if not url or len(url) < 8:
 133         return None
 134
 135     # This is a list of prefixes that you can use to minimize the amount of
 136     # keystrokes that you have to use.
 137     # Feel free to suggest other useful prefixes, and I'll add them here.
 138     PREFIXES = {
 139             'fb:': 'http://feeds2.feedburner.com/%s',
 140             'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
 141             'sc:': 'http://soundcloud.com/%s',
 142     }
 143
 144     for prefix, expansion in PREFIXES.iteritems():
 145         if url.startswith(prefix):
 146             url = expansion % (url[len(prefix):],)
 147             break
 148
 149     # Assume HTTP for URLs without scheme
 150     if not '://' in url:
 151         url = 'http://' + url
 152
 153     # The scheme of the URL should be all-lowercase
 154     (scheme, rest) = url.split('://', 1)
 155     scheme = scheme.lower()
 156
 157     # Remember to parse iTunes XML for itms:// URLs
 158     do_parse_itunes_xml = (scheme == 'itms')
 159
 160     # feed://, itpc:// and itms:// are really http://
 161     if scheme in ('feed', 'itpc', 'itms'):
 162         scheme = 'http'
 163
 164     # Re-assemble our URL
 165     url = scheme + '://' + rest
 166
 167     # If we had an itms:// URL, parse XML
 168     if do_parse_itunes_xml:
 169         url = parse_itunes_xml(url)
 170
 171     # Links to "phobos.apple.com"
 172     url = itunes_discover_rss(url)
 173
 174     if scheme in ('http', 'https', 'ftp'):
 175         return url
 176
 177     return None
 178
 179
 180 def username_password_from_url(url):
 181     r"""
 182     Returns a tuple (username,password) containing authentication
 183     data from the specified URL or (None,None) if no authentication
 184     data can be found in the URL.
 185
 186     See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
 187
 188     >>> username_password_from_url('https://@host.com/')
 189     ('', None)
 190     >>> username_password_from_url('telnet://host.com/')
 191     (None, None)
 192     >>> username_password_from_url('ftp://foo:@host.com/')
 193     ('foo', '')
 194     >>> username_password_from_url('http://a:b@host.com/')
 195     ('a', 'b')
 196     >>> username_password_from_url(1)
 197     Traceback (most recent call last):
 198       ...
 199     ValueError: URL has to be a string or unicode object.
 200     >>> username_password_from_url(None)
 201     Traceback (most recent call last):
 202       ...
 203     ValueError: URL has to be a string or unicode object.
 204     >>> username_password_from_url('http://a@b:c@host.com/')
 205     Traceback (most recent call last):
 206       ...
 207     ValueError: "@" must be encoded for username/password (RFC1738).
 208     >>> username_password_from_url('ftp://a:b:c@host.com/')
 209     Traceback (most recent call last):
 210       ...
 211     ValueError: ":" must be encoded for username/password (RFC1738).
 212     >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
 213     ('i/o', 'P@ss:')
 214     >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
 215     ('\xc3\xb6sterreich', None)
 216     """
 217     if type(url) not in (str, unicode):
 218         raise ValueError('URL has to be a string or unicode object.')
 219
 220     (username, password) = (None, None)
 221
 222     (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
 223
 224     if '@' in netloc:
 225         (authentication, netloc) = netloc.rsplit('@', 1)
 226         if ':' in authentication:
 227             (username, password) = authentication.split(':', 1)
 228             # RFC1738 dictates that we should not allow these unquoted
 229             # characters in the username and password field (Section 3.1).
 230             for c in (':', '@', '/'):
 231                 if c in username or c in password:
 232                     raise ValueError('"%c" must be encoded for username/password (RFC1738).' % c)
 233             username = urllib.unquote(username)
 234             password = urllib.unquote(password)
 235         else:
 236             username = urllib.unquote(authentication)
 237
 238     return (username, password)
 239
 240
 241 def directory_is_writable( path):
 242     """
 243     Returns True if the specified directory exists and is writable
 244     by the current user.
 245     """
 246     return os.path.isdir( path) and os.access( path, os.W_OK)
 247
 248
 249 def calculate_size( path):
 250     """
 251     Tries to calculate the size of a directory, including any
 252     subdirectories found. The returned value might not be
 253     correct if the user doesn't have appropriate permissions
 254     to list all subdirectories of the given path.
 255     """
 256     if path is None:
 257         return 0L
 258
 259     if os.path.dirname( path) == '/':
 260         return 0L
 261
 262     if os.path.isfile( path):
 263         return os.path.getsize( path)
 264
 265     if os.path.isdir( path) and not os.path.islink( path):
 266         sum = os.path.getsize( path)
 267
 268         try:
 269             for item in os.listdir(path):
 270                 try:
 271                     sum += calculate_size(os.path.join(path, item))
 272                 except:
 273                     log('Cannot get size for %s', path)
 274         except:
 275             log('Cannot access: %s', path)
 276
 277         return sum
 278
 279     return 0L
 280
 281
 282 def file_modification_datetime(filename):
 283     """
 284     Returns the modification date of the specified file
 285     as a datetime.datetime object or None if the modification
 286     date cannot be determined.
 287     """
 288     if filename is None:
 289         return None
 290
 291     if not os.access(filename, os.R_OK):
 292         return None
 293
 294     try:
 295         s = os.stat(filename)
 296         timestamp = s[stat.ST_MTIME]
 297         return datetime.datetime.fromtimestamp(timestamp)
 298     except:
 299         log('Cannot get modification timestamp for %s', filename)
 300         return None
 301
 302
 303 def file_modification_timestamp(filename):
 304     """
 305     Returns the modification date of the specified file as a number
 306     or -1 if the modification date cannot be determined.
 307     """
 308     if filename is None:
 309         return -1
 310     try:
 311         s = os.stat(filename)
 312         return s[stat.ST_MTIME]
 313     except:
 314         log('Cannot get modification timestamp for %s', filename)
 315         return -1
 316
 317
 318 def file_age_in_days(filename):
 319     """
 320     Returns the age of the specified filename in days or
 321     zero if the modification date cannot be determined.
 322     """
 323     dt = file_modification_datetime(filename)
 324     if dt is None:
 325         return 0
 326     else:
 327         return (datetime.datetime.now()-dt).days
 328
 329
 330 def file_age_to_string(days):
 331     """
 332     Converts a "number of days" value to a string that
 333     can be used in the UI to display the file age.
 334
 335     >>> file_age_to_string(0)
 336     ''
 337     >>> file_age_to_string(1)
 338     u'1 day ago'
 339     >>> file_age_to_string(2)
 340     u'2 days ago'
 341     """
 342     if days < 1:
 343         return ''
 344     else:
 345         return N_('%d day ago', '%d days ago', days) % days
 346
 347
 348 def get_free_disk_space_win32(path):
 349     """
 350     Win32-specific code to determine the free disk space remaining
 351     for a given path. Uses code from:
 352
 353     http://mail.python.org/pipermail/python-list/2003-May/203223.html
 354     """
 355
 356     drive, tail = os.path.splitdrive(path)
 357
 358     try:
 359         import win32file
 360         userFree, userTotal, freeOnDisk = win32file.GetDiskFreeSpaceEx(drive)
 361         return userFree
 362     except ImportError:
 363         log('Warning: Running on Win32 but win32api/win32file not installed.')
 364
 365     # Cannot determine free disk space
 366     return 0
 367
 368
 369 def get_free_disk_space(path):
 370     """
 371     Calculates the free disk space available to the current user
 372     on the file system that contains the given path.
 373
 374     If the path (or its parent folder) does not yet exist, this
 375     function returns zero.
 376     """
 377
 378     if not os.path.exists(path):
 379         return 0
 380
 381     if gpodder.win32:
 382         return get_free_disk_space_win32(path)
 383
 384     s = os.statvfs(path)
 385
 386     return s.f_bavail * s.f_bsize
 387
 388
 389 def format_date(timestamp):
 390     """
 391     Converts a UNIX timestamp to a date representation. This
 392     function returns "Today", "Yesterday", a weekday name or
 393     the date in %x format, which (according to the Python docs)
 394     is the "Locale's appropriate date representation".
 395
 396     Returns None if there has been an error converting the
 397     timestamp to a string representation.
 398     """
 399     if timestamp is None:
 400         return None
 401
 402     seconds_in_a_day = 60*60*24
 403
 404     today = time.localtime()[:3]
 405     yesterday = time.localtime(time.time() - seconds_in_a_day)[:3]
 406     try:
 407         timestamp_date = time.localtime(timestamp)[:3]
 408     except ValueError, ve:
 409         log('Warning: Cannot convert timestamp', traceback=True)
 410         return None
 411
 412     if timestamp_date == today:
 413        return _('Today')
 414     elif timestamp_date == yesterday:
 415        return _('Yesterday')
 416
 417     try:
 418         diff = int( (time.time() - timestamp)/seconds_in_a_day )
 419     except:
 420         log('Warning: Cannot convert "%s" to date.', timestamp, traceback=True)
 421         return None
 422
 423     try:
 424         timestamp = datetime.datetime.fromtimestamp(timestamp)
 425     except:
 426         return None
 427
 428     if diff < 7:
 429         # Weekday name
 430         return str(timestamp.strftime('%A'))
 431     else:
 432         # Locale's appropriate date representation
 433         return str(timestamp.strftime('%x'))
 434
 435
 436 def format_filesize(bytesize, use_si_units=False, digits=2):
 437     """
 438     Formats the given size in bytes to be human-readable,
 439
 440     Returns a localized "(unknown)" string when the bytesize
 441     has a negative value.
 442     """
 443     si_units = (
 444             ( 'kB', 10**3 ),
 445             ( 'MB', 10**6 ),
 446             ( 'GB', 10**9 ),
 447     )
 448
 449     binary_units = (
 450             ( 'KiB', 2**10 ),
 451             ( 'MiB', 2**20 ),
 452             ( 'GiB', 2**30 ),
 453     )
 454
 455     try:
 456         bytesize = float( bytesize)
 457     except:
 458         return _('(unknown)')
 459
 460     if bytesize < 0:
 461         return _('(unknown)')
 462
 463     if use_si_units:
 464         units = si_units
 465     else:
 466         units = binary_units
 467
 468     ( used_unit, used_value ) = ( 'B', bytesize )
 469
 470     for ( unit, value ) in units:
 471         if bytesize >= value:
 472             used_value = bytesize / float(value)
 473             used_unit = unit
 474
 475     return ('%.'+str(digits)+'f %s') % (used_value, used_unit)
 476
 477
 478 def delete_file( path):
 479     """
 480     Tries to delete the given filename and silently
 481     ignores deletion errors (if the file doesn't exist).
 482     Also deletes extracted cover files if they exist.
 483     """
 484     log( 'Trying to delete: %s', path)
 485     try:
 486         os.unlink( path)
 487         # Remove any extracted cover art that might exist
 488         for cover_file in glob.glob( '%s.cover.*' % ( path, )):
 489             os.unlink( cover_file)
 490
 491     except:
 492         pass
 493
 494
 495
 496 def remove_html_tags(html):
 497     """
 498     Remove HTML tags from a string and replace numeric and
 499     named entities with the corresponding character, so the
 500     HTML text can be displayed in a simple text view.
 501     """
 502     # If we would want more speed, we could make these global
 503     re_strip_tags = re.compile('<[^>]*>')
 504     re_unicode_entities = re.compile('&#(\d{2,4});')
 505     re_html_entities = re.compile('&(.{2,8});')
 506     re_newline_tags = re.compile('(<br[^>]*>|<[/]?ul[^>]*>|</li>)', re.I)
 507     re_listing_tags = re.compile('<li[^>]*>', re.I)
 508
 509     result = html
 510
 511     # Convert common HTML elements to their text equivalent
 512     result = re_newline_tags.sub('\n', result)
 513     result = re_listing_tags.sub('\n * ', result)
 514     result = re.sub('<[Pp]>', '\n\n', result)
 515
 516     # Remove all HTML/XML tags from the string
 517     result = re_strip_tags.sub('', result)
 518
 519     # Convert numeric XML entities to their unicode character
 520     result = re_unicode_entities.sub(lambda x: unichr(int(x.group(1))), result)
 521
 522     # Convert named HTML entities to their unicode character
 523     result = re_html_entities.sub(lambda x: unicode(entitydefs.get(x.group(1),''), 'iso-8859-1'), result)
 524
 525     # Convert more than two newlines to two newlines
 526     result = re.sub('([\r\n]{2})([\r\n])+', '\\1', result)
 527
 528     return result.strip()
 529
 530
 531 def extension_from_mimetype(mimetype):
 532     """
 533     Simply guesses what the file extension should be from the mimetype
 534     """
 535     return mimetypes.guess_extension(mimetype) or ''
 536
 537
 538 def extension_correct_for_mimetype(extension, mimetype):
 539     """
 540     Check if the given filename extension (e.g. ".ogg") is a possible
 541     extension for a given mimetype (e.g. "application/ogg") and return
 542     a boolean value (True if it's possible, False if not). Also do
 543
 544     >>> extension_correct_for_mimetype('.ogg', 'application/ogg')
 545     True
 546     >>> extension_correct_for_mimetype('.ogv', 'video/ogg')
 547     True
 548     >>> extension_correct_for_mimetype('.ogg', 'audio/mpeg')
 549     False
 550     >>> extension_correct_for_mimetype('mp3', 'audio/mpeg')
 551     Traceback (most recent call last):
 552       ...
 553     ValueError: "mp3" is not an extension (missing .)
 554     >>> extension_correct_for_mimetype('.mp3', 'audio mpeg')
 555     Traceback (most recent call last):
 556       ...
 557     ValueError: "audio mpeg" is not a mimetype (missing /)
 558     """
 559     if not '/' in mimetype:
 560         raise ValueError('"%s" is not a mimetype (missing /)' % mimetype)
 561     if not extension.startswith('.'):
 562         raise ValueError('"%s" is not an extension (missing .)' % extension)
 563
 564     # Create a "default" extension from the mimetype, e.g. "application/ogg"
 565     # becomes ".ogg", "audio/mpeg" becomes ".mpeg", etc...
 566     default = ['.'+mimetype.split('/')[-1]]
 567
 568     return extension in default+mimetypes.guess_all_extensions(mimetype)
 569
 570
 571 def filename_from_url(url):
 572     """
 573     Extracts the filename and (lowercase) extension (with dot)
 574     from a URL, e.g. http://server.com/file.MP3?download=yes
 575     will result in the string ("file", ".mp3") being returned.
 576
 577     This function will also try to best-guess the "real"
 578     extension for a media file (audio, video) by
 579     trying to match an extension to these types and recurse
 580     into the query string to find better matches, if the
 581     original extension does not resolve to a known type.
 582
 583     http://my.net/redirect.php?my.net/file.ogg => ("file", ".ogg")
 584     http://server/get.jsp?file=/episode0815.MOV => ("episode0815", ".mov")
 585     http://s/redirect.mp4?http://serv2/test.mp4 => ("test", ".mp4")
 586     """
 587     (scheme, netloc, path, para, query, fragid) = urlparse.urlparse(url)
 588     (filename, extension) = os.path.splitext(os.path.basename( urllib.unquote(path)))
 589
 590     if file_type_by_extension(extension) is not None and not \
 591         query.startswith(scheme+'://'):
 592         # We have found a valid extension (audio, video)
 593         # and the query string doesn't look like a URL
 594         return ( filename, extension.lower() )
 595
 596     # If the query string looks like a possible URL, try that first
 597     if len(query.strip()) > 0 and query.find('/') != -1:
 598         query_url = '://'.join((scheme, urllib.unquote(query)))
 599         (query_filename, query_extension) = filename_from_url(query_url)
 600
 601         if file_type_by_extension(query_extension) is not None:
 602             return os.path.splitext(os.path.basename(query_url))
 603
 604     # No exact match found, simply return the original filename & extension
 605     return ( filename, extension.lower() )
 606
 607
 608 def file_type_by_extension(extension):
 609     """
 610     Tries to guess the file type by looking up the filename
 611     extension from a table of known file types. Will return
 612     "audio", "video" or None.
 613
 614     >>> file_type_by_extension('.aif')
 615     'audio'
 616     >>> file_type_by_extension('.3GP')
 617     'video'
 618     >>> file_type_by_extension('.txt') is None
 619     True
 620     >>> file_type_by_extension(None) is None
 621     True
 622     >>> file_type_by_extension('ogg')
 623     Traceback (most recent call last):
 624       ...
 625     ValueError: Extension does not start with a dot: ogg
 626     """
 627     if not extension:
 628         return None
 629
 630     if not extension.startswith('.'):
 631         raise ValueError('Extension does not start with a dot: %s' % extension)
 632
 633     global _BUILTIN_FILE_TYPES
 634     if _BUILTIN_FILE_TYPES is None:
 635         # List all types that are not in the default mimetypes.types_map
 636         # (even if they might be detected by mimetypes.guess_type)
 637         # For OGG, see http://wiki.xiph.org/MIME_Types_and_File_Extensions
 638         audio_types = ('.ogg', '.oga', '.spx', '.flac', '.axa', \
 639                        '.aac', '.m4a', '.m4b', '.wma')
 640         video_types = ('.ogv', '.axv', '.mp4', \
 641                        '.mkv', '.m4v', '.divx', '.flv', '.wmv', '.3gp')
 642         _BUILTIN_FILE_TYPES = {}
 643         _BUILTIN_FILE_TYPES.update((ext, 'audio') for ext in audio_types)
 644         _BUILTIN_FILE_TYPES.update((ext, 'video') for ext in video_types)
 645
 646     extension = extension.lower()
 647
 648     if extension in _BUILTIN_FILE_TYPES:
 649         return _BUILTIN_FILE_TYPES[extension]
 650
 651     # Need to prepend something to the extension, so guess_type works
 652     type, encoding = mimetypes.guess_type('file'+extension)
 653
 654     if type is not None and '/' in type:
 655         filetype, rest = type.split('/', 1)
 656         if filetype in ('audio', 'video'):
 657             return filetype
 658
 659     return None
 660
 661
 662 def get_first_line( s):
 663     """
 664     Returns only the first line of a string, stripped so
 665     that it doesn't have whitespace before or after.
 666     """
 667     return s.strip().split('\n')[0].strip()
 668
 669
 670 def object_string_formatter( s, **kwargs):
 671     """
 672     Makes attributes of object passed in as keyword
 673     arguments available as {OBJECTNAME.ATTRNAME} in
 674     the passed-in string and returns a string with
 675     the above arguments replaced with the attribute
 676     values of the corresponding object.
 677
 678     Example:
 679
 680     e = Episode()
 681     e.title = 'Hello'
 682     s = '{episode.title} World'
 683
 684     print object_string_formatter( s, episode = e)
 685           => 'Hello World'
 686     """
 687     result = s
 688     for ( key, o ) in kwargs.items():
 689         matches = re.findall( r'\{%s\.([^\}]+)\}' % key, s)
 690         for attr in matches:
 691             if hasattr( o, attr):
 692                 try:
 693                     from_s = '{%s.%s}' % ( key, attr )
 694                     to_s = getattr( o, attr)
 695                     result = result.replace( from_s, to_s)
 696                 except:
 697                     log( 'Could not replace attribute "%s" in string "%s".', attr, s)
 698
 699     return result
 700
 701
 702 def format_desktop_command(command, filenames):
 703     """
 704     Formats a command template from the "Exec=" line of a .desktop
 705     file to a string that can be invoked in a shell.
 706
 707     Handled format strings: %U, %u, %F, %f and a fallback that
 708     appends the filename as first parameter of the command.
 709
 710     See http://standards.freedesktop.org/desktop-entry-spec/1.0/ar01s06.html
 711
 712     Returns a list of commands to execute, either one for
 713     each filename if the application does not support multiple
 714     file names or one for all filenames (%U, %F or unknown).
 715     """
 716     command = shlex.split(command)
 717
 718     command_before = command
 719     command_after = []
 720     multiple_arguments = True
 721     for fieldcode in ('%U', '%F', '%u', '%f'):
 722         if fieldcode in command:
 723             command_before = command[:command.index(fieldcode)]
 724             command_after = command[command.index(fieldcode)+1:]
 725             multiple_arguments = fieldcode in ('%U', '%F')
 726             break
 727
 728     if multiple_arguments:
 729         return [command_before + filenames + command_after]
 730
 731     commands = []
 732     for filename in filenames:
 733         commands.append(command_before+[filename]+command_after)
 734
 735     return commands
 736
 737 def url_strip_authentication(url):
 738     """
 739     Strips authentication data from an URL. Returns the URL with
 740     the authentication data removed from it.
 741
 742     >>> url_strip_authentication('https://host.com/')
 743     'https://host.com/'
 744     >>> url_strip_authentication('telnet://foo:bar@host.com/')
 745     'telnet://host.com/'
 746     >>> url_strip_authentication('ftp://billy@example.org')
 747     'ftp://example.org'
 748     >>> url_strip_authentication('ftp://billy:@example.org')
 749     'ftp://example.org'
 750     >>> url_strip_authentication('http://aa:bc@localhost/x')
 751     'http://localhost/x'
 752     >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
 753     'http://blubb.lan/u.html'
 754     >>> url_strip_authentication('http://c:d@x.org/')
 755     'http://x.org/'
 756     >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
 757     'http://cx.lan'
 758     """
 759     url_parts = list(urlparse.urlsplit(url))
 760     # url_parts[1] is the HOST part of the URL
 761
 762     # Remove existing authentication data
 763     if '@' in url_parts[1]:
 764         url_parts[1] = url_parts[1].split('@', 2)[1]
 765
 766     return urlparse.urlunsplit(url_parts)
 767
 768
 769 def url_add_authentication(url, username, password):
 770     """
 771     Adds authentication data (username, password) to a given
 772     URL in order to construct an authenticated URL.
 773
 774     >>> url_add_authentication('https://host.com/', '', None)
 775     'https://host.com/'
 776     >>> url_add_authentication('http://example.org/', None, None)
 777     'http://example.org/'
 778     >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
 779     'telnet://foo:bar@host.com/'
 780     >>> url_add_authentication('ftp://example.org', 'billy', None)
 781     'ftp://billy@example.org'
 782     >>> url_add_authentication('ftp://example.org', 'billy', '')
 783     'ftp://billy:@example.org'
 784     >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
 785     'http://aa:bc@localhost/x'
 786     >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
 787     'http://i%2Fo:P%40ss%3A@blubb.lan/u.html'
 788     >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
 789     'http://c:d@x.org/'
 790     >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@:', 'i/')
 791     'http://P%40%3A:i%2F@cx.lan'
 792     """
 793     if username is None or username == '':
 794         return url
 795
 796     username = urllib.quote_plus(username)
 797
 798     if password is not None:
 799         password = urllib.quote_plus(password)
 800         auth_string = ':'.join((username, password))
 801     else:
 802         auth_string = username
 803
 804     url = url_strip_authentication(url)
 805
 806     url_parts = list(urlparse.urlsplit(url))
 807     # url_parts[1] is the HOST part of the URL
 808     url_parts[1] = '@'.join((auth_string, url_parts[1]))
 809
 810     return urlparse.urlunsplit(url_parts)
 811
 812
 813 def get_real_url(url):
 814     """
 815     Gets the real URL of a file and resolves all redirects.
 816     """
 817     try:
 818         username, password = username_password_from_url(url)
 819         if username or password:
 820             url = url_strip_authentication(url)
 821             log('url=%s, username=%s, password=%s', url, username, password)
 822             password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
 823             password_mgr.add_password(None, url, username, password)
 824             handler = urllib2.HTTPBasicAuthHandler(password_mgr)
 825             opener = urllib2.build_opener(handler)
 826             return opener.open(url).geturl()
 827         else:
 828             return urlopen(url).geturl()
 829     except:
 830         log('Error getting real url for %s', url, traceback=True)
 831         return url
 832
 833 def urlopen(url):
 834     """
 835     An URL opener with the User-agent set to gPodder (with version)
 836     """
 837     headers = {'User-agent': gpodder.user_agent}
 838     request = urllib2.Request(url, headers=headers)
 839     return urllib2.urlopen(request)
 840
 841 def find_command( command):
 842     """
 843     Searches the system's PATH for a specific command that is
 844     executable by the user. Returns the first occurence of an
 845     executable binary in the PATH, or None if the command is
 846     not available.
 847     """
 848
 849     if 'PATH' not in os.environ:
 850         return None
 851
 852     for path in os.environ['PATH'].split( os.pathsep):
 853         command_file = os.path.join( path, command)
 854         if os.path.isfile( command_file) and os.access( command_file, os.X_OK):
 855             return command_file
 856
 857     return None
 858
 859
 860 def parse_itunes_xml(url):
 861     """
 862     Parses an XML document in the "url" parameter (this has to be
 863     a itms:// or http:// URL to a XML doc) and searches all "<dict>"
 864     elements for the first occurence of a "<key>feedURL</key>"
 865     element and then continues the search for the string value of
 866     this key.
 867
 868     This returns the RSS feed URL for Apple iTunes Podcast XML
 869     documents that are retrieved by itunes_discover_rss().
 870     """
 871     url = url.replace('itms://', 'http://')
 872     doc = http_get_and_gunzip(url)
 873     try:
 874         d = xml.dom.minidom.parseString(doc)
 875     except Exception, e:
 876         log('Error parsing document from itms:// URL: %s', e)
 877         return None
 878     last_key = None
 879     for pairs in d.getElementsByTagName('dict'):
 880         for node in pairs.childNodes:
 881             if node.nodeType != node.ELEMENT_NODE:
 882                 continue
 883
 884             if node.tagName == 'key' and node.childNodes.length > 0:
 885                 if node.firstChild.nodeType == node.TEXT_NODE:
 886                     last_key = node.firstChild.data
 887
 888             if last_key != 'feedURL':
 889                 continue
 890
 891             if node.tagName == 'string' and node.childNodes.length > 0:
 892                 if node.firstChild.nodeType == node.TEXT_NODE:
 893                     return node.firstChild.data
 894
 895     return None
 896
 897
 898 def http_get_and_gunzip(uri):
 899     """
 900     Does a HTTP GET request and tells the server that we accept
 901     gzip-encoded data. This is necessary, because the Apple iTunes
 902     server will always return gzip-encoded data, regardless of what
 903     we really request.
 904
 905     Returns the uncompressed document at the given URI.
 906     """
 907     request = urllib2.Request(uri)
 908     request.add_header("Accept-encoding", "gzip")
 909     usock = urllib2.urlopen(request)
 910     data = usock.read()
 911     if usock.headers.get('content-encoding', None) == 'gzip':
 912         data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
 913     return data
 914
 915
 916 def itunes_discover_rss(url):
 917     """
 918     Takes an iTunes-specific podcast URL and turns it
 919     into a "normal" RSS feed URL. If the given URL is
 920     not a phobos.apple.com URL, we will simply return
 921     the URL and assume it's already an RSS feed URL.
 922
 923     Idea from Andrew Clarke's itunes-url-decoder.py
 924     """
 925
 926     if url is None:
 927         return url
 928
 929     if not 'phobos.apple.com' in url.lower():
 930         # This doesn't look like an iTunes URL
 931         return url
 932
 933     try:
 934         data = http_get_and_gunzip(url)
 935         (url,) = re.findall("itmsOpen\('([^']*)", data)
 936         return parse_itunes_xml(url)
 937     except:
 938         return None
 939
 940
 941 def idle_add(func, *args):
 942     """
 943     This is a wrapper function that does the Right
 944     Thing depending on if we are running a GTK+ GUI or
 945     not. If not, we're simply calling the function.
 946
 947     If we are a GUI app, we use gobject.idle_add() to
 948     call the function later - this is needed for
 949     threads to be able to modify GTK+ widget data.
 950     """
 951     if gpodder.ui.desktop or gpodder.ui.maemo:
 952         import gobject
 953         def x(f, *a):
 954             f(*a)
 955             return False
 956
 957         gobject.idle_add(func, *args)
 958     else:
 959         func(*args)
 960
 961
 962 def bluetooth_available():
 963     """
 964     Returns True or False depending on the availability
 965     of bluetooth functionality on the system.
 966     """
 967     if find_command('bluetooth-sendto') or \
 968             find_command('gnome-obex-send'):
 969         return True
 970     else:
 971         return False
 972
 973
 974 def bluetooth_send_file(filename):
 975     """
 976     Sends a file via bluetooth.
 977
 978     This function tries to use "bluetooth-sendto", and if
 979     it is not available, it also tries "gnome-obex-send".
 980     """
 981     command_line = None
 982
 983     if find_command('bluetooth-sendto'):
 984         command_line = ['bluetooth-sendto']
 985     elif find_command('gnome-obex-send'):
 986         command_line = ['gnome-obex-send']
 987
 988     if command_line is not None:
 989         command_line.append(filename)
 990         return (subprocess.Popen(command_line).wait() == 0)
 991     else:
 992         log('Cannot send file. Please install "bluetooth-sendto" or "gnome-obex-send".')
 993         return False
 994
 995
 996 def format_seconds_to_hour_min_sec(seconds):
 997     """
 998     Take the number of seconds and format it into a
 999     human-readable string (duration).
1000
1001     >>> format_seconds_to_hour_min_sec(3834)
1002     u'1 hour, 3 minutes and 54 seconds'
1003     >>> format_seconds_to_hour_min_sec(3600)
1004     u'1 hour'
1005     >>> format_seconds_to_hour_min_sec(62)
1006     u'1 minute and 2 seconds'
1007     """
1008
1009     if seconds < 1:
1010         return N_('%d second', '%d seconds', seconds) % seconds
1011
1012     result = []
1013
1014     hours = seconds/3600
1015     seconds = seconds%3600
1016
1017     minutes = seconds/60
1018     seconds = seconds%60
1019
1020     if hours:
1021         result.append(N_('%d hour', '%d hours', hours) % hours)
1022
1023     if minutes:
1024         result.append(N_('%d minute', '%d minutes', minutes) % minutes)
1025
1026     if seconds:
1027         result.append(N_('%d second', '%d seconds', seconds) % seconds)
1028
1029     if len(result) > 1:
1030         return (' '+_('and')+' ').join((', '.join(result[:-1]), result[-1]))
1031     else:
1032         return result[0]
1033
1034 def http_request(url, method='HEAD'):
1035     (scheme, netloc, path, parms, qry, fragid) = urlparse.urlparse(url)
1036     conn = httplib.HTTPConnection(netloc)
1037     start = len(scheme) + len('://') + len(netloc)
1038     conn.request(method, url[start:])
1039     return conn.getresponse()
1040
1041 def get_episode_info_from_url(url):
1042     """
1043     Try to get information about a podcast episode by sending
1044     a HEAD request to the HTTP server and parsing the result.
1045
1046     The return value is a dict containing all fields that
1047     could be parsed from the URL. This currently contains:
1048
1049       "length": The size of the file in bytes
1050       "pubdate": The unix timestamp for the pubdate
1051
1052     If there is an error, this function returns {}. This will
1053     only function with http:// and https:// URLs.
1054     """
1055     if not (url.startswith('http://') or url.startswith('https://')):
1056         return {}
1057
1058     r = http_request(url)
1059     result = {}
1060
1061     log('Trying to get metainfo for %s', url)
1062
1063     if 'content-length' in r.msg:
1064         try:
1065             length = int(r.msg['content-length'])
1066             result['length'] = length
1067         except ValueError, e:
1068             log('Error converting content-length header.')
1069
1070     if 'last-modified' in r.msg:
1071         try:
1072             parsed_date = feedparser._parse_date(r.msg['last-modified'])
1073             pubdate = time.mktime(parsed_date)
1074             result['pubdate'] = pubdate
1075         except:
1076             log('Error converting last-modified header.')
1077
1078     return result
1079
1080
1081 def gui_open(filename):
1082     """
1083     Open a file or folder with the default application set
1084     by the Desktop environment. This uses "xdg-open" on all
1085     systems with a few exceptions:
1086
1087        on Win32, os.startfile() is used
1088        on Maemo, osso is used to communicate with Nokia Media Player
1089     """
1090     try:
1091         if gpodder.ui.maemo:
1092             try:
1093                 import osso
1094             except ImportError, ie:
1095                 log('Cannot import osso module on maemo.')
1096                 return False
1097
1098             log('Using Nokia Media Player to open %s', filename)
1099             context = osso.Context('gPodder', gpodder.__version__, False)
1100             filename = filename.encode('utf-8')
1101             rpc = osso.Rpc(context)
1102             service, path = 'com.nokia.mediaplayer', '/com/nokia/mediaplayer'
1103             rpc.rpc_run(service, path, service, 'mime_open', (filename,))
1104         elif gpodder.win32:
1105             os.startfile(filename)
1106         else:
1107             subprocess.Popen(['xdg-open', filename])
1108         return True
1109     except:
1110         log('Cannot open file/folder: "%s"', filename, traceback=True)
1111         return False
1112
1113
1114 def open_website(url):
1115     """
1116     Opens the specified URL using the default system web
1117     browser. This uses Python's "webbrowser" module, so
1118     make sure your system is set up correctly.
1119     """
1120     if gpodder.ui.maemo:
1121         import osso
1122         context = osso.Context('gPodder', gpodder.__version__, False)
1123         rpc = osso.Rpc(context)
1124         rpc.rpc_run_with_defaults('osso_browser', \
1125                                   'open_new_window', \
1126                                   (url,))
1127     else:
1128         threading.Thread(target=webbrowser.open, args=(url,)).start()
1129
1130 def sanitize_encoding(filename):
1131     r"""
1132     Generate a sanitized version of a string (i.e.
1133     remove invalid characters and encode in the
1134     detected native language encoding).
1135
1136     >>> sanitize_encoding('\x80')
1137     ''
1138     >>> sanitize_encoding(u'unicode')
1139     'unicode'
1140     """
1141     global encoding
1142     if not isinstance(filename, unicode):
1143         filename = filename.decode(encoding, 'ignore')
1144     return filename.encode(encoding, 'ignore')
1145
1146
1147 def sanitize_filename(filename, max_length=0, use_ascii=False):
1148     """
1149     Generate a sanitized version of a filename that can
1150     be written on disk (i.e. remove/replace invalid
1151     characters and encode in the native language) and
1152     trim filename if greater than max_length (0 = no limit).
1153
1154     If use_ascii is True, don't encode in the native language,
1155     but use only characters from the ASCII character set.
1156     """
1157     global encoding
1158     if use_ascii:
1159         e = 'ascii'
1160     else:
1161         e = encoding
1162
1163     if not isinstance(filename, unicode):
1164         filename = filename.decode(encoding, 'ignore')
1165
1166     if max_length > 0 and len(filename) > max_length:
1167         log('Limiting file/folder name "%s" to %d characters.', filename, max_length)
1168         filename = filename[:max_length]
1169
1170     return re.sub('[/|?*<>:+\[\]\"\\\]', '_', filename.strip().encode(e, 'ignore'))
1171
1172
1173 def find_mount_point(directory):
1174     """
1175     Try to find the mount point for a given directory.
1176     If the directory is itself a mount point, return
1177     it. If not, remove the last part of the path and
1178     re-check if it's a mount point. If the directory
1179     resides on your root filesystem, "/" is returned.
1180
1181     >>> find_mount_point('/')
1182     '/'
1183
1184     >>> find_mount_point(u'/something')
1185     Traceback (most recent call last):
1186       ...
1187     ValueError: Convert unicode objects to str first.
1188
1189     >>> find_mount_point(None)
1190     Traceback (most recent call last):
1191       ...
1192     ValueError: Directory names should be of type str.
1193
1194     >>> find_mount_point(42)
1195     Traceback (most recent call last):
1196       ...
1197     ValueError: Directory names should be of type str.
1198
1199     >>> from minimock import mock, restore
1200     >>> mocked_mntpoints = ('/', '/home', '/media/usbdisk', '/media/cdrom')
1201     >>> mock('os.path.ismount', returns_func=lambda x: x in mocked_mntpoints)
1202     >>>
1203     >>> # For mocking os.getcwd(), we simply use a lambda to avoid the
1204     >>> # massive output of "Called os.getcwd()" lines in this doctest
1205     >>> os.getcwd = lambda: '/home/thp'
1206     >>>
1207     >>> find_mount_point('.')
1208     Called os.path.ismount('/home/thp')
1209     Called os.path.ismount('/home')
1210     '/home'
1211     >>> find_mount_point('relativity')
1212     Called os.path.ismount('/home/thp/relativity')
1213     Called os.path.ismount('/home/thp')
1214     Called os.path.ismount('/home')
1215     '/home'
1216     >>> find_mount_point('/media/usbdisk/')
1217     Called os.path.ismount('/media/usbdisk')
1218     '/media/usbdisk'
1219     >>> find_mount_point('/home/thp/Desktop')
1220     Called os.path.ismount('/home/thp/Desktop')
1221     Called os.path.ismount('/home/thp')
1222     Called os.path.ismount('/home')
1223     '/home'
1224     >>> find_mount_point('/media/usbdisk/Podcasts/With Spaces')
1225     Called os.path.ismount('/media/usbdisk/Podcasts/With Spaces')
1226     Called os.path.ismount('/media/usbdisk/Podcasts')
1227     Called os.path.ismount('/media/usbdisk')
1228     '/media/usbdisk'
1229     >>> find_mount_point('/home/')
1230     Called os.path.ismount('/home')
1231     '/home'
1232     >>> find_mount_point('/media/cdrom/../usbdisk/blubb//')
1233     Called os.path.ismount('/media/usbdisk/blubb')
1234     Called os.path.ismount('/media/usbdisk')
1235     '/media/usbdisk'
1236     >>> restore()
1237     """
1238     if isinstance(directory, unicode):
1239         # We do not accept unicode strings, because they could fail when
1240         # trying to be converted to some native encoding, so fail loudly
1241         # and leave it up to the callee to encode into the proper encoding.
1242         raise ValueError('Convert unicode objects to str first.')
1243
1244     if not isinstance(directory, str):
1245         raise ValueError('Directory names should be of type str.')
1246
1247     directory = os.path.abspath(directory)
1248
1249     while directory != '/':
1250         if os.path.ismount(directory):
1251             return directory
1252         else:
1253             (directory, tail_data) = os.path.split(directory)
1254
1255     return '/'
1256
1257
1258 # matches http:// and ftp:// and mailto://
1259 protocolPattern = re.compile(r'^\w+://')
1260
1261 def isabs(string):
1262     """
1263     @return true if string is an absolute path or protocoladdress
1264     for addresses beginning in http:// or ftp:// or ldap:// -
1265     they are considered "absolute" paths.
1266     Source: http://code.activestate.com/recipes/208993/
1267     """
1268     if protocolPattern.match(string): return 1
1269     return os.path.isabs(string)
1270
1271 def rel2abs(path, base = os.curdir):
1272     """ converts a relative path to an absolute path.
1273
1274     @param path the path to convert - if already absolute, is returned
1275     without conversion.
1276     @param base - optional. Defaults to the current directory.
1277     The base is intelligently concatenated to the given relative path.
1278     @return the relative path of path from base
1279     Source: http://code.activestate.com/recipes/208993/
1280     """
1281     if isabs(path): return path
1282     retval = os.path.join(base,path)
1283     return os.path.abspath(retval)
1284
1285 def commonpath(l1, l2, common=[]):
1286     """
1287     helper functions for relpath
1288     Source: http://code.activestate.com/recipes/208993/
1289     """
1290     if len(l1) < 1: return (common, l1, l2)
1291     if len(l2) < 1: return (common, l1, l2)
1292     if l1[0] != l2[0]: return (common, l1, l2)
1293     return commonpath(l1[1:], l2[1:], common+[l1[0]])
1294
1295 def relpath(p1, p2):
1296     """
1297     Finds relative path from p1 to p2
1298     Source: http://code.activestate.com/recipes/208993/
1299     """
1300     pathsplit = lambda s: s.split(os.path.sep)
1301
1302     (common,l1,l2) = commonpath(pathsplit(p1), pathsplit(p2))
1303     p = []
1304     if len(l1) > 0:
1305         p = [ ('..'+os.sep) * len(l1) ]
1306     p = p + l2
1307     if len(p) is 0:
1308         return "."
1309
1310     return os.path.join(*p)
1311
1312
1313 def run_external_command(command_line):
1314     """
1315     This is the function that will be called in a separate
1316     thread that will call an external command (specified by
1317     command_line). In case of problem (i.e. the command has
1318     not been found or there has been another error), we will
1319     call the notification function with two arguments - the
1320     first being the error message and the second being the
1321     title to be used for the error message.
1322
1323     >>> from minimock import mock, Mock, restore
1324     >>> mock('subprocess.Popen', returns=Mock('subprocess.Popen'))
1325     >>> run_external_command('testprogramm')
1326     Called subprocess.Popen('testprogramm', shell=True)
1327     Called subprocess.Popen.wait()
1328     >>> restore()
1329     """
1330
1331     def open_process(command_line):
1332         log('Running external command: %s', command_line)
1333         p = subprocess.Popen(command_line, shell=True)
1334         result = p.wait()
1335         if result == 127:
1336             log('Command not found: %s', command_line)
1337         elif result == 126:
1338             log('Command permission denied: %s', command_line)
1339         elif result > 0:
1340             log('Command returned an error (%d): %s', result, command_line)
1341         else:
1342             log('Command finished successfully: %s', command_line)
1343
1344     threading.Thread(target=open_process, args=(command_line,)).start()
1345