Improve parsing support for OPML files
[gpodder.git] / src / gpodder / util.py
blob087dda2fd247d14b776a9ef3e53ffd880b46b09a
1 # -*- coding: utf-8 -*-
3 # gPodder - A media aggregator and podcast client
4 # Copyright (c) 2005-2010 Thomas Perl and the gPodder Team
6 # gPodder is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # gPodder is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 # util.py -- Misc utility functions
22 # Thomas Perl <thp@perli.net> 2007-08-04
25 """Miscellaneous helper functions for gPodder
27 This module provides helper and utility functions for gPodder that
28 are not tied to any specific part of gPodder.
30 """
32 import gpodder
33 from gpodder.liblogger import log
35 import os
36 import os.path
37 import platform
38 import glob
39 import stat
40 import shlex
42 import re
43 import subprocess
44 from htmlentitydefs import entitydefs
45 import time
46 import locale
47 import gzip
48 import datetime
49 import threading
51 import urlparse
52 import urllib
53 import urllib2
54 import httplib
55 import webbrowser
56 import mimetypes
58 import feedparser
60 import StringIO
61 import xml.dom.minidom
63 _ = gpodder.gettext
64 N_ = gpodder.ngettext
67 # Try to detect OS encoding (by Leonid Ponomarev)
68 if gpodder.ui.maemo:
69 encoding = 'utf8'
70 else:
71 encoding = 'iso-8859-15'
73 if 'LANG' in os.environ and '.' in os.environ['LANG']:
74 lang = os.environ['LANG']
75 (language, encoding) = lang.rsplit('.', 1)
76 log('Detected encoding: %s', encoding)
77 enc = encoding
78 else:
79 # Using iso-8859-15 here as (hopefully) sane default
80 # see http://en.wikipedia.org/wiki/ISO/IEC_8859-1
81 log('Using ISO-8859-15 as encoding. If this')
82 log('is incorrect, please set your $LANG variable.')
85 # Used by file_type_by_extension()
86 _BUILTIN_FILE_TYPES = None
89 def make_directory( path):
90 """
91 Tries to create a directory if it does not exist already.
92 Returns True if the directory exists after the function
93 call, False otherwise.
94 """
95 if os.path.isdir( path):
96 return True
98 try:
99 os.makedirs( path)
100 except:
101 log( 'Could not create directory: %s', path)
102 return False
104 return True
107 def normalize_feed_url(url):
109 Converts any URL to http:// or ftp:// so that it can be
110 used with "wget". If the URL cannot be converted (invalid
111 or unknown scheme), "None" is returned.
113 This will also normalize feed:// and itpc:// to http://
114 Also supported are phobos.apple.com links (iTunes podcast)
115 and itms:// links (iTunes podcast direct link).
117 >>> normalize_feed_url('itpc://example.org/podcast.rss')
118 'http://example.org/podcast.rss'
120 If no URL scheme is defined (e.g. "curry.com"), we will
121 simply assume the user intends to add a http:// feed.
123 >>> normalize_feed_url('curry.com')
124 'http://curry.com'
126 There are even some more shortcuts for advanced users
127 and lazy typists (see the source for details).
129 >>> normalize_feed_url('fb:43FPodcast')
130 'http://feeds2.feedburner.com/43FPodcast'
132 if not url or len(url) < 8:
133 return None
135 # This is a list of prefixes that you can use to minimize the amount of
136 # keystrokes that you have to use.
137 # Feel free to suggest other useful prefixes, and I'll add them here.
138 PREFIXES = {
139 'fb:': 'http://feeds2.feedburner.com/%s',
140 'yt:': 'http://www.youtube.com/rss/user/%s/videos.rss',
141 'sc:': 'http://soundcloud.com/%s',
144 for prefix, expansion in PREFIXES.iteritems():
145 if url.startswith(prefix):
146 url = expansion % (url[len(prefix):],)
147 break
149 # Assume HTTP for URLs without scheme
150 if not '://' in url:
151 url = 'http://' + url
153 # The scheme of the URL should be all-lowercase
154 (scheme, rest) = url.split('://', 1)
155 scheme = scheme.lower()
157 # Remember to parse iTunes XML for itms:// URLs
158 do_parse_itunes_xml = (scheme == 'itms')
160 # feed://, itpc:// and itms:// are really http://
161 if scheme in ('feed', 'itpc', 'itms'):
162 scheme = 'http'
164 # Re-assemble our URL
165 url = scheme + '://' + rest
167 # If we had an itms:// URL, parse XML
168 if do_parse_itunes_xml:
169 url = parse_itunes_xml(url)
171 # Links to "phobos.apple.com"
172 url = itunes_discover_rss(url)
174 if scheme in ('http', 'https', 'ftp'):
175 return url
177 return None
180 def username_password_from_url(url):
181 r"""
182 Returns a tuple (username,password) containing authentication
183 data from the specified URL or (None,None) if no authentication
184 data can be found in the URL.
186 See Section 3.1 of RFC 1738 (http://www.ietf.org/rfc/rfc1738.txt)
188 >>> username_password_from_url('https://@host.com/')
189 ('', None)
190 >>> username_password_from_url('telnet://host.com/')
191 (None, None)
192 >>> username_password_from_url('ftp://foo:@host.com/')
193 ('foo', '')
194 >>> username_password_from_url('http://a:b@host.com/')
195 ('a', 'b')
196 >>> username_password_from_url(1)
197 Traceback (most recent call last):
199 ValueError: URL has to be a string or unicode object.
200 >>> username_password_from_url(None)
201 Traceback (most recent call last):
203 ValueError: URL has to be a string or unicode object.
204 >>> username_password_from_url('http://a@b:c@host.com/')
205 Traceback (most recent call last):
207 ValueError: "@" must be encoded for username/password (RFC1738).
208 >>> username_password_from_url('ftp://a:b:c@host.com/')
209 Traceback (most recent call last):
211 ValueError: ":" must be encoded for username/password (RFC1738).
212 >>> username_password_from_url('http://i%2Fo:P%40ss%3A@host.com/')
213 ('i/o', 'P@ss:')
214 >>> username_password_from_url('ftp://%C3%B6sterreich@host.com/')
215 ('\xc3\xb6sterreich', None)
217 if type(url) not in (str, unicode):
218 raise ValueError('URL has to be a string or unicode object.')
220 (username, password) = (None, None)
222 (scheme, netloc, path, params, query, fragment) = urlparse.urlparse(url)
224 if '@' in netloc:
225 (authentication, netloc) = netloc.rsplit('@', 1)
226 if ':' in authentication:
227 (username, password) = authentication.split(':', 1)
228 # RFC1738 dictates that we should not allow these unquoted
229 # characters in the username and password field (Section 3.1).
230 for c in (':', '@', '/'):
231 if c in username or c in password:
232 raise ValueError('"%c" must be encoded for username/password (RFC1738).' % c)
233 username = urllib.unquote(username)
234 password = urllib.unquote(password)
235 else:
236 username = urllib.unquote(authentication)
238 return (username, password)
241 def directory_is_writable( path):
243 Returns True if the specified directory exists and is writable
244 by the current user.
246 return os.path.isdir( path) and os.access( path, os.W_OK)
249 def calculate_size( path):
251 Tries to calculate the size of a directory, including any
252 subdirectories found. The returned value might not be
253 correct if the user doesn't have appropriate permissions
254 to list all subdirectories of the given path.
256 if path is None:
257 return 0L
259 if os.path.dirname( path) == '/':
260 return 0L
262 if os.path.isfile( path):
263 return os.path.getsize( path)
265 if os.path.isdir( path) and not os.path.islink( path):
266 sum = os.path.getsize( path)
268 try:
269 for item in os.listdir(path):
270 try:
271 sum += calculate_size(os.path.join(path, item))
272 except:
273 log('Cannot get size for %s', path)
274 except:
275 log('Cannot access: %s', path)
277 return sum
279 return 0L
282 def file_modification_datetime(filename):
284 Returns the modification date of the specified file
285 as a datetime.datetime object or None if the modification
286 date cannot be determined.
288 if filename is None:
289 return None
291 if not os.access(filename, os.R_OK):
292 return None
294 try:
295 s = os.stat(filename)
296 timestamp = s[stat.ST_MTIME]
297 return datetime.datetime.fromtimestamp(timestamp)
298 except:
299 log('Cannot get modification timestamp for %s', filename)
300 return None
303 def file_modification_timestamp(filename):
305 Returns the modification date of the specified file as a number
306 or -1 if the modification date cannot be determined.
308 if filename is None:
309 return -1
310 try:
311 s = os.stat(filename)
312 return s[stat.ST_MTIME]
313 except:
314 log('Cannot get modification timestamp for %s', filename)
315 return -1
318 def file_age_in_days(filename):
320 Returns the age of the specified filename in days or
321 zero if the modification date cannot be determined.
323 dt = file_modification_datetime(filename)
324 if dt is None:
325 return 0
326 else:
327 return (datetime.datetime.now()-dt).days
330 def file_age_to_string(days):
332 Converts a "number of days" value to a string that
333 can be used in the UI to display the file age.
335 >>> file_age_to_string(0)
337 >>> file_age_to_string(1)
338 u'1 day ago'
339 >>> file_age_to_string(2)
340 u'2 days ago'
342 if days < 1:
343 return ''
344 else:
345 return N_('%d day ago', '%d days ago', days) % days
348 def get_free_disk_space_win32(path):
350 Win32-specific code to determine the free disk space remaining
351 for a given path. Uses code from:
353 http://mail.python.org/pipermail/python-list/2003-May/203223.html
356 drive, tail = os.path.splitdrive(path)
358 try:
359 import win32file
360 userFree, userTotal, freeOnDisk = win32file.GetDiskFreeSpaceEx(drive)
361 return userFree
362 except ImportError:
363 log('Warning: Running on Win32 but win32api/win32file not installed.')
365 # Cannot determine free disk space
366 return 0
369 def get_free_disk_space(path):
371 Calculates the free disk space available to the current user
372 on the file system that contains the given path.
374 If the path (or its parent folder) does not yet exist, this
375 function returns zero.
378 if not os.path.exists(path):
379 return 0
381 if gpodder.win32:
382 return get_free_disk_space_win32(path)
384 s = os.statvfs(path)
386 return s.f_bavail * s.f_bsize
389 def format_date(timestamp):
391 Converts a UNIX timestamp to a date representation. This
392 function returns "Today", "Yesterday", a weekday name or
393 the date in %x format, which (according to the Python docs)
394 is the "Locale's appropriate date representation".
396 Returns None if there has been an error converting the
397 timestamp to a string representation.
399 if timestamp is None:
400 return None
402 seconds_in_a_day = 60*60*24
404 today = time.localtime()[:3]
405 yesterday = time.localtime(time.time() - seconds_in_a_day)[:3]
406 try:
407 timestamp_date = time.localtime(timestamp)[:3]
408 except ValueError, ve:
409 log('Warning: Cannot convert timestamp', traceback=True)
410 return None
412 if timestamp_date == today:
413 return _('Today')
414 elif timestamp_date == yesterday:
415 return _('Yesterday')
417 try:
418 diff = int( (time.time() - timestamp)/seconds_in_a_day )
419 except:
420 log('Warning: Cannot convert "%s" to date.', timestamp, traceback=True)
421 return None
423 try:
424 timestamp = datetime.datetime.fromtimestamp(timestamp)
425 except:
426 return None
428 if diff < 7:
429 # Weekday name
430 return str(timestamp.strftime('%A'))
431 else:
432 # Locale's appropriate date representation
433 return str(timestamp.strftime('%x'))
436 def format_filesize(bytesize, use_si_units=False, digits=2):
438 Formats the given size in bytes to be human-readable,
440 Returns a localized "(unknown)" string when the bytesize
441 has a negative value.
443 si_units = (
444 ( 'kB', 10**3 ),
445 ( 'MB', 10**6 ),
446 ( 'GB', 10**9 ),
449 binary_units = (
450 ( 'KiB', 2**10 ),
451 ( 'MiB', 2**20 ),
452 ( 'GiB', 2**30 ),
455 try:
456 bytesize = float( bytesize)
457 except:
458 return _('(unknown)')
460 if bytesize < 0:
461 return _('(unknown)')
463 if use_si_units:
464 units = si_units
465 else:
466 units = binary_units
468 ( used_unit, used_value ) = ( 'B', bytesize )
470 for ( unit, value ) in units:
471 if bytesize >= value:
472 used_value = bytesize / float(value)
473 used_unit = unit
475 return ('%.'+str(digits)+'f %s') % (used_value, used_unit)
478 def delete_file( path):
480 Tries to delete the given filename and silently
481 ignores deletion errors (if the file doesn't exist).
482 Also deletes extracted cover files if they exist.
484 log( 'Trying to delete: %s', path)
485 try:
486 os.unlink( path)
487 # Remove any extracted cover art that might exist
488 for cover_file in glob.glob( '%s.cover.*' % ( path, )):
489 os.unlink( cover_file)
491 except:
492 pass
496 def remove_html_tags(html):
498 Remove HTML tags from a string and replace numeric and
499 named entities with the corresponding character, so the
500 HTML text can be displayed in a simple text view.
502 # If we would want more speed, we could make these global
503 re_strip_tags = re.compile('<[^>]*>')
504 re_unicode_entities = re.compile('&#(\d{2,4});')
505 re_html_entities = re.compile('&(.{2,8});')
506 re_newline_tags = re.compile('(<br[^>]*>|<[/]?ul[^>]*>|</li>)', re.I)
507 re_listing_tags = re.compile('<li[^>]*>', re.I)
509 result = html
511 # Convert common HTML elements to their text equivalent
512 result = re_newline_tags.sub('\n', result)
513 result = re_listing_tags.sub('\n * ', result)
514 result = re.sub('<[Pp]>', '\n\n', result)
516 # Remove all HTML/XML tags from the string
517 result = re_strip_tags.sub('', result)
519 # Convert numeric XML entities to their unicode character
520 result = re_unicode_entities.sub(lambda x: unichr(int(x.group(1))), result)
522 # Convert named HTML entities to their unicode character
523 result = re_html_entities.sub(lambda x: unicode(entitydefs.get(x.group(1),''), 'iso-8859-1'), result)
525 # Convert more than two newlines to two newlines
526 result = re.sub('([\r\n]{2})([\r\n])+', '\\1', result)
528 return result.strip()
531 def extension_from_mimetype(mimetype):
533 Simply guesses what the file extension should be from the mimetype
535 return mimetypes.guess_extension(mimetype) or ''
538 def extension_correct_for_mimetype(extension, mimetype):
540 Check if the given filename extension (e.g. ".ogg") is a possible
541 extension for a given mimetype (e.g. "application/ogg") and return
542 a boolean value (True if it's possible, False if not). Also do
544 >>> extension_correct_for_mimetype('.ogg', 'application/ogg')
545 True
546 >>> extension_correct_for_mimetype('.ogv', 'video/ogg')
547 True
548 >>> extension_correct_for_mimetype('.ogg', 'audio/mpeg')
549 False
550 >>> extension_correct_for_mimetype('mp3', 'audio/mpeg')
551 Traceback (most recent call last):
553 ValueError: "mp3" is not an extension (missing .)
554 >>> extension_correct_for_mimetype('.mp3', 'audio mpeg')
555 Traceback (most recent call last):
557 ValueError: "audio mpeg" is not a mimetype (missing /)
559 if not '/' in mimetype:
560 raise ValueError('"%s" is not a mimetype (missing /)' % mimetype)
561 if not extension.startswith('.'):
562 raise ValueError('"%s" is not an extension (missing .)' % extension)
564 # Create a "default" extension from the mimetype, e.g. "application/ogg"
565 # becomes ".ogg", "audio/mpeg" becomes ".mpeg", etc...
566 default = ['.'+mimetype.split('/')[-1]]
568 return extension in default+mimetypes.guess_all_extensions(mimetype)
571 def filename_from_url(url):
573 Extracts the filename and (lowercase) extension (with dot)
574 from a URL, e.g. http://server.com/file.MP3?download=yes
575 will result in the string ("file", ".mp3") being returned.
577 This function will also try to best-guess the "real"
578 extension for a media file (audio, video) by
579 trying to match an extension to these types and recurse
580 into the query string to find better matches, if the
581 original extension does not resolve to a known type.
583 http://my.net/redirect.php?my.net/file.ogg => ("file", ".ogg")
584 http://server/get.jsp?file=/episode0815.MOV => ("episode0815", ".mov")
585 http://s/redirect.mp4?http://serv2/test.mp4 => ("test", ".mp4")
587 (scheme, netloc, path, para, query, fragid) = urlparse.urlparse(url)
588 (filename, extension) = os.path.splitext(os.path.basename( urllib.unquote(path)))
590 if file_type_by_extension(extension) is not None and not \
591 query.startswith(scheme+'://'):
592 # We have found a valid extension (audio, video)
593 # and the query string doesn't look like a URL
594 return ( filename, extension.lower() )
596 # If the query string looks like a possible URL, try that first
597 if len(query.strip()) > 0 and query.find('/') != -1:
598 query_url = '://'.join((scheme, urllib.unquote(query)))
599 (query_filename, query_extension) = filename_from_url(query_url)
601 if file_type_by_extension(query_extension) is not None:
602 return os.path.splitext(os.path.basename(query_url))
604 # No exact match found, simply return the original filename & extension
605 return ( filename, extension.lower() )
608 def file_type_by_extension(extension):
610 Tries to guess the file type by looking up the filename
611 extension from a table of known file types. Will return
612 "audio", "video" or None.
614 >>> file_type_by_extension('.aif')
615 'audio'
616 >>> file_type_by_extension('.3GP')
617 'video'
618 >>> file_type_by_extension('.txt') is None
619 True
620 >>> file_type_by_extension(None) is None
621 True
622 >>> file_type_by_extension('ogg')
623 Traceback (most recent call last):
625 ValueError: Extension does not start with a dot: ogg
627 if not extension:
628 return None
630 if not extension.startswith('.'):
631 raise ValueError('Extension does not start with a dot: %s' % extension)
633 global _BUILTIN_FILE_TYPES
634 if _BUILTIN_FILE_TYPES is None:
635 # List all types that are not in the default mimetypes.types_map
636 # (even if they might be detected by mimetypes.guess_type)
637 # For OGG, see http://wiki.xiph.org/MIME_Types_and_File_Extensions
638 audio_types = ('.ogg', '.oga', '.spx', '.flac', '.axa', \
639 '.aac', '.m4a', '.m4b', '.wma')
640 video_types = ('.ogv', '.axv', '.mp4', \
641 '.mkv', '.m4v', '.divx', '.flv', '.wmv', '.3gp')
642 _BUILTIN_FILE_TYPES = {}
643 _BUILTIN_FILE_TYPES.update((ext, 'audio') for ext in audio_types)
644 _BUILTIN_FILE_TYPES.update((ext, 'video') for ext in video_types)
646 extension = extension.lower()
648 if extension in _BUILTIN_FILE_TYPES:
649 return _BUILTIN_FILE_TYPES[extension]
651 # Need to prepend something to the extension, so guess_type works
652 type, encoding = mimetypes.guess_type('file'+extension)
654 if type is not None and '/' in type:
655 filetype, rest = type.split('/', 1)
656 if filetype in ('audio', 'video'):
657 return filetype
659 return None
662 def get_first_line( s):
664 Returns only the first line of a string, stripped so
665 that it doesn't have whitespace before or after.
667 return s.strip().split('\n')[0].strip()
670 def object_string_formatter( s, **kwargs):
672 Makes attributes of object passed in as keyword
673 arguments available as {OBJECTNAME.ATTRNAME} in
674 the passed-in string and returns a string with
675 the above arguments replaced with the attribute
676 values of the corresponding object.
678 Example:
680 e = Episode()
681 e.title = 'Hello'
682 s = '{episode.title} World'
684 print object_string_formatter( s, episode = e)
685 => 'Hello World'
687 result = s
688 for ( key, o ) in kwargs.items():
689 matches = re.findall( r'\{%s\.([^\}]+)\}' % key, s)
690 for attr in matches:
691 if hasattr( o, attr):
692 try:
693 from_s = '{%s.%s}' % ( key, attr )
694 to_s = getattr( o, attr)
695 result = result.replace( from_s, to_s)
696 except:
697 log( 'Could not replace attribute "%s" in string "%s".', attr, s)
699 return result
702 def format_desktop_command(command, filenames):
704 Formats a command template from the "Exec=" line of a .desktop
705 file to a string that can be invoked in a shell.
707 Handled format strings: %U, %u, %F, %f and a fallback that
708 appends the filename as first parameter of the command.
710 See http://standards.freedesktop.org/desktop-entry-spec/1.0/ar01s06.html
712 Returns a list of commands to execute, either one for
713 each filename if the application does not support multiple
714 file names or one for all filenames (%U, %F or unknown).
716 command = shlex.split(command)
718 command_before = command
719 command_after = []
720 multiple_arguments = True
721 for fieldcode in ('%U', '%F', '%u', '%f'):
722 if fieldcode in command:
723 command_before = command[:command.index(fieldcode)]
724 command_after = command[command.index(fieldcode)+1:]
725 multiple_arguments = fieldcode in ('%U', '%F')
726 break
728 if multiple_arguments:
729 return [command_before + filenames + command_after]
731 commands = []
732 for filename in filenames:
733 commands.append(command_before+[filename]+command_after)
735 return commands
737 def url_strip_authentication(url):
739 Strips authentication data from an URL. Returns the URL with
740 the authentication data removed from it.
742 >>> url_strip_authentication('https://host.com/')
743 'https://host.com/'
744 >>> url_strip_authentication('telnet://foo:bar@host.com/')
745 'telnet://host.com/'
746 >>> url_strip_authentication('ftp://billy@example.org')
747 'ftp://example.org'
748 >>> url_strip_authentication('ftp://billy:@example.org')
749 'ftp://example.org'
750 >>> url_strip_authentication('http://aa:bc@localhost/x')
751 'http://localhost/x'
752 >>> url_strip_authentication('http://i%2Fo:P%40ss%3A@blubb.lan/u.html')
753 'http://blubb.lan/u.html'
754 >>> url_strip_authentication('http://c:d@x.org/')
755 'http://x.org/'
756 >>> url_strip_authentication('http://P%40%3A:i%2F@cx.lan')
757 'http://cx.lan'
759 url_parts = list(urlparse.urlsplit(url))
760 # url_parts[1] is the HOST part of the URL
762 # Remove existing authentication data
763 if '@' in url_parts[1]:
764 url_parts[1] = url_parts[1].split('@', 2)[1]
766 return urlparse.urlunsplit(url_parts)
769 def url_add_authentication(url, username, password):
771 Adds authentication data (username, password) to a given
772 URL in order to construct an authenticated URL.
774 >>> url_add_authentication('https://host.com/', '', None)
775 'https://host.com/'
776 >>> url_add_authentication('http://example.org/', None, None)
777 'http://example.org/'
778 >>> url_add_authentication('telnet://host.com/', 'foo', 'bar')
779 'telnet://foo:bar@host.com/'
780 >>> url_add_authentication('ftp://example.org', 'billy', None)
781 'ftp://billy@example.org'
782 >>> url_add_authentication('ftp://example.org', 'billy', '')
783 'ftp://billy:@example.org'
784 >>> url_add_authentication('http://localhost/x', 'aa', 'bc')
785 'http://aa:bc@localhost/x'
786 >>> url_add_authentication('http://blubb.lan/u.html', 'i/o', 'P@ss:')
787 'http://i%2Fo:P%40ss%3A@blubb.lan/u.html'
788 >>> url_add_authentication('http://a:b@x.org/', 'c', 'd')
789 'http://c:d@x.org/'
790 >>> url_add_authentication('http://i%2F:P%40%3A@cx.lan', 'P@:', 'i/')
791 'http://P%40%3A:i%2F@cx.lan'
793 if username is None or username == '':
794 return url
796 username = urllib.quote_plus(username)
798 if password is not None:
799 password = urllib.quote_plus(password)
800 auth_string = ':'.join((username, password))
801 else:
802 auth_string = username
804 url = url_strip_authentication(url)
806 url_parts = list(urlparse.urlsplit(url))
807 # url_parts[1] is the HOST part of the URL
808 url_parts[1] = '@'.join((auth_string, url_parts[1]))
810 return urlparse.urlunsplit(url_parts)
813 def get_real_url(url):
815 Gets the real URL of a file and resolves all redirects.
817 try:
818 username, password = username_password_from_url(url)
819 if username or password:
820 url = url_strip_authentication(url)
821 log('url=%s, username=%s, password=%s', url, username, password)
822 password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
823 password_mgr.add_password(None, url, username, password)
824 handler = urllib2.HTTPBasicAuthHandler(password_mgr)
825 opener = urllib2.build_opener(handler)
826 return opener.open(url).geturl()
827 else:
828 return urlopen(url).geturl()
829 except:
830 log('Error getting real url for %s', url, traceback=True)
831 return url
833 def urlopen(url):
835 An URL opener with the User-agent set to gPodder (with version)
837 headers = {'User-agent': gpodder.user_agent}
838 request = urllib2.Request(url, headers=headers)
839 return urllib2.urlopen(request)
841 def find_command( command):
843 Searches the system's PATH for a specific command that is
844 executable by the user. Returns the first occurence of an
845 executable binary in the PATH, or None if the command is
846 not available.
849 if 'PATH' not in os.environ:
850 return None
852 for path in os.environ['PATH'].split( os.pathsep):
853 command_file = os.path.join( path, command)
854 if os.path.isfile( command_file) and os.access( command_file, os.X_OK):
855 return command_file
857 return None
860 def parse_itunes_xml(url):
862 Parses an XML document in the "url" parameter (this has to be
863 a itms:// or http:// URL to a XML doc) and searches all "<dict>"
864 elements for the first occurence of a "<key>feedURL</key>"
865 element and then continues the search for the string value of
866 this key.
868 This returns the RSS feed URL for Apple iTunes Podcast XML
869 documents that are retrieved by itunes_discover_rss().
871 url = url.replace('itms://', 'http://')
872 doc = http_get_and_gunzip(url)
873 try:
874 d = xml.dom.minidom.parseString(doc)
875 except Exception, e:
876 log('Error parsing document from itms:// URL: %s', e)
877 return None
878 last_key = None
879 for pairs in d.getElementsByTagName('dict'):
880 for node in pairs.childNodes:
881 if node.nodeType != node.ELEMENT_NODE:
882 continue
884 if node.tagName == 'key' and node.childNodes.length > 0:
885 if node.firstChild.nodeType == node.TEXT_NODE:
886 last_key = node.firstChild.data
888 if last_key != 'feedURL':
889 continue
891 if node.tagName == 'string' and node.childNodes.length > 0:
892 if node.firstChild.nodeType == node.TEXT_NODE:
893 return node.firstChild.data
895 return None
898 def http_get_and_gunzip(uri):
900 Does a HTTP GET request and tells the server that we accept
901 gzip-encoded data. This is necessary, because the Apple iTunes
902 server will always return gzip-encoded data, regardless of what
903 we really request.
905 Returns the uncompressed document at the given URI.
907 request = urllib2.Request(uri)
908 request.add_header("Accept-encoding", "gzip")
909 usock = urllib2.urlopen(request)
910 data = usock.read()
911 if usock.headers.get('content-encoding', None) == 'gzip':
912 data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
913 return data
916 def itunes_discover_rss(url):
918 Takes an iTunes-specific podcast URL and turns it
919 into a "normal" RSS feed URL. If the given URL is
920 not a phobos.apple.com URL, we will simply return
921 the URL and assume it's already an RSS feed URL.
923 Idea from Andrew Clarke's itunes-url-decoder.py
926 if url is None:
927 return url
929 if not 'phobos.apple.com' in url.lower():
930 # This doesn't look like an iTunes URL
931 return url
933 try:
934 data = http_get_and_gunzip(url)
935 (url,) = re.findall("itmsOpen\('([^']*)", data)
936 return parse_itunes_xml(url)
937 except:
938 return None
941 def idle_add(func, *args):
943 This is a wrapper function that does the Right
944 Thing depending on if we are running a GTK+ GUI or
945 not. If not, we're simply calling the function.
947 If we are a GUI app, we use gobject.idle_add() to
948 call the function later - this is needed for
949 threads to be able to modify GTK+ widget data.
951 if gpodder.ui.desktop or gpodder.ui.maemo:
952 import gobject
953 def x(f, *a):
954 f(*a)
955 return False
957 gobject.idle_add(func, *args)
958 else:
959 func(*args)
962 def bluetooth_available():
964 Returns True or False depending on the availability
965 of bluetooth functionality on the system.
967 if find_command('bluetooth-sendto') or \
968 find_command('gnome-obex-send'):
969 return True
970 else:
971 return False
974 def bluetooth_send_file(filename):
976 Sends a file via bluetooth.
978 This function tries to use "bluetooth-sendto", and if
979 it is not available, it also tries "gnome-obex-send".
981 command_line = None
983 if find_command('bluetooth-sendto'):
984 command_line = ['bluetooth-sendto']
985 elif find_command('gnome-obex-send'):
986 command_line = ['gnome-obex-send']
988 if command_line is not None:
989 command_line.append(filename)
990 return (subprocess.Popen(command_line).wait() == 0)
991 else:
992 log('Cannot send file. Please install "bluetooth-sendto" or "gnome-obex-send".')
993 return False
996 def format_seconds_to_hour_min_sec(seconds):
998 Take the number of seconds and format it into a
999 human-readable string (duration).
1001 >>> format_seconds_to_hour_min_sec(3834)
1002 u'1 hour, 3 minutes and 54 seconds'
1003 >>> format_seconds_to_hour_min_sec(3600)
1004 u'1 hour'
1005 >>> format_seconds_to_hour_min_sec(62)
1006 u'1 minute and 2 seconds'
1009 if seconds < 1:
1010 return N_('%d second', '%d seconds', seconds) % seconds
1012 result = []
1014 hours = seconds/3600
1015 seconds = seconds%3600
1017 minutes = seconds/60
1018 seconds = seconds%60
1020 if hours:
1021 result.append(N_('%d hour', '%d hours', hours) % hours)
1023 if minutes:
1024 result.append(N_('%d minute', '%d minutes', minutes) % minutes)
1026 if seconds:
1027 result.append(N_('%d second', '%d seconds', seconds) % seconds)
1029 if len(result) > 1:
1030 return (' '+_('and')+' ').join((', '.join(result[:-1]), result[-1]))
1031 else:
1032 return result[0]
1034 def http_request(url, method='HEAD'):
1035 (scheme, netloc, path, parms, qry, fragid) = urlparse.urlparse(url)
1036 conn = httplib.HTTPConnection(netloc)
1037 start = len(scheme) + len('://') + len(netloc)
1038 conn.request(method, url[start:])
1039 return conn.getresponse()
1041 def get_episode_info_from_url(url):
1043 Try to get information about a podcast episode by sending
1044 a HEAD request to the HTTP server and parsing the result.
1046 The return value is a dict containing all fields that
1047 could be parsed from the URL. This currently contains:
1049 "length": The size of the file in bytes
1050 "pubdate": The unix timestamp for the pubdate
1052 If there is an error, this function returns {}. This will
1053 only function with http:// and https:// URLs.
1055 if not (url.startswith('http://') or url.startswith('https://')):
1056 return {}
1058 r = http_request(url)
1059 result = {}
1061 log('Trying to get metainfo for %s', url)
1063 if 'content-length' in r.msg:
1064 try:
1065 length = int(r.msg['content-length'])
1066 result['length'] = length
1067 except ValueError, e:
1068 log('Error converting content-length header.')
1070 if 'last-modified' in r.msg:
1071 try:
1072 parsed_date = feedparser._parse_date(r.msg['last-modified'])
1073 pubdate = time.mktime(parsed_date)
1074 result['pubdate'] = pubdate
1075 except:
1076 log('Error converting last-modified header.')
1078 return result
1081 def gui_open(filename):
1083 Open a file or folder with the default application set
1084 by the Desktop environment. This uses "xdg-open" on all
1085 systems with a few exceptions:
1087 on Win32, os.startfile() is used
1088 on Maemo, osso is used to communicate with Nokia Media Player
1090 try:
1091 if gpodder.ui.maemo:
1092 try:
1093 import osso
1094 except ImportError, ie:
1095 log('Cannot import osso module on maemo.')
1096 return False
1098 log('Using Nokia Media Player to open %s', filename)
1099 context = osso.Context('gPodder', gpodder.__version__, False)
1100 filename = filename.encode('utf-8')
1101 rpc = osso.Rpc(context)
1102 service, path = 'com.nokia.mediaplayer', '/com/nokia/mediaplayer'
1103 rpc.rpc_run(service, path, service, 'mime_open', (filename,))
1104 elif gpodder.win32:
1105 os.startfile(filename)
1106 else:
1107 subprocess.Popen(['xdg-open', filename])
1108 return True
1109 except:
1110 log('Cannot open file/folder: "%s"', filename, traceback=True)
1111 return False
1114 def open_website(url):
1116 Opens the specified URL using the default system web
1117 browser. This uses Python's "webbrowser" module, so
1118 make sure your system is set up correctly.
1120 if gpodder.ui.maemo:
1121 import osso
1122 context = osso.Context('gPodder', gpodder.__version__, False)
1123 rpc = osso.Rpc(context)
1124 rpc.rpc_run_with_defaults('osso_browser', \
1125 'open_new_window', \
1126 (url,))
1127 else:
1128 threading.Thread(target=webbrowser.open, args=(url,)).start()
1130 def sanitize_encoding(filename):
1131 r"""
1132 Generate a sanitized version of a string (i.e.
1133 remove invalid characters and encode in the
1134 detected native language encoding).
1136 >>> sanitize_encoding('\x80')
1138 >>> sanitize_encoding(u'unicode')
1139 'unicode'
1141 global encoding
1142 if not isinstance(filename, unicode):
1143 filename = filename.decode(encoding, 'ignore')
1144 return filename.encode(encoding, 'ignore')
1147 def sanitize_filename(filename, max_length=0, use_ascii=False):
1149 Generate a sanitized version of a filename that can
1150 be written on disk (i.e. remove/replace invalid
1151 characters and encode in the native language) and
1152 trim filename if greater than max_length (0 = no limit).
1154 If use_ascii is True, don't encode in the native language,
1155 but use only characters from the ASCII character set.
1157 global encoding
1158 if use_ascii:
1159 e = 'ascii'
1160 else:
1161 e = encoding
1163 if not isinstance(filename, unicode):
1164 filename = filename.decode(encoding, 'ignore')
1166 if max_length > 0 and len(filename) > max_length:
1167 log('Limiting file/folder name "%s" to %d characters.', filename, max_length)
1168 filename = filename[:max_length]
1170 return re.sub('[/|?*<>:+\[\]\"\\\]', '_', filename.strip().encode(e, 'ignore'))
1173 def find_mount_point(directory):
1175 Try to find the mount point for a given directory.
1176 If the directory is itself a mount point, return
1177 it. If not, remove the last part of the path and
1178 re-check if it's a mount point. If the directory
1179 resides on your root filesystem, "/" is returned.
1181 >>> find_mount_point('/')
1184 >>> find_mount_point(u'/something')
1185 Traceback (most recent call last):
1187 ValueError: Convert unicode objects to str first.
1189 >>> find_mount_point(None)
1190 Traceback (most recent call last):
1192 ValueError: Directory names should be of type str.
1194 >>> find_mount_point(42)
1195 Traceback (most recent call last):
1197 ValueError: Directory names should be of type str.
1199 >>> from minimock import mock, restore
1200 >>> mocked_mntpoints = ('/', '/home', '/media/usbdisk', '/media/cdrom')
1201 >>> mock('os.path.ismount', returns_func=lambda x: x in mocked_mntpoints)
1203 >>> # For mocking os.getcwd(), we simply use a lambda to avoid the
1204 >>> # massive output of "Called os.getcwd()" lines in this doctest
1205 >>> os.getcwd = lambda: '/home/thp'
1207 >>> find_mount_point('.')
1208 Called os.path.ismount('/home/thp')
1209 Called os.path.ismount('/home')
1210 '/home'
1211 >>> find_mount_point('relativity')
1212 Called os.path.ismount('/home/thp/relativity')
1213 Called os.path.ismount('/home/thp')
1214 Called os.path.ismount('/home')
1215 '/home'
1216 >>> find_mount_point('/media/usbdisk/')
1217 Called os.path.ismount('/media/usbdisk')
1218 '/media/usbdisk'
1219 >>> find_mount_point('/home/thp/Desktop')
1220 Called os.path.ismount('/home/thp/Desktop')
1221 Called os.path.ismount('/home/thp')
1222 Called os.path.ismount('/home')
1223 '/home'
1224 >>> find_mount_point('/media/usbdisk/Podcasts/With Spaces')
1225 Called os.path.ismount('/media/usbdisk/Podcasts/With Spaces')
1226 Called os.path.ismount('/media/usbdisk/Podcasts')
1227 Called os.path.ismount('/media/usbdisk')
1228 '/media/usbdisk'
1229 >>> find_mount_point('/home/')
1230 Called os.path.ismount('/home')
1231 '/home'
1232 >>> find_mount_point('/media/cdrom/../usbdisk/blubb//')
1233 Called os.path.ismount('/media/usbdisk/blubb')
1234 Called os.path.ismount('/media/usbdisk')
1235 '/media/usbdisk'
1236 >>> restore()
1238 if isinstance(directory, unicode):
1239 # We do not accept unicode strings, because they could fail when
1240 # trying to be converted to some native encoding, so fail loudly
1241 # and leave it up to the callee to encode into the proper encoding.
1242 raise ValueError('Convert unicode objects to str first.')
1244 if not isinstance(directory, str):
1245 raise ValueError('Directory names should be of type str.')
1247 directory = os.path.abspath(directory)
1249 while directory != '/':
1250 if os.path.ismount(directory):
1251 return directory
1252 else:
1253 (directory, tail_data) = os.path.split(directory)
1255 return '/'
1258 # matches http:// and ftp:// and mailto://
1259 protocolPattern = re.compile(r'^\w+://')
1261 def isabs(string):
1263 @return true if string is an absolute path or protocoladdress
1264 for addresses beginning in http:// or ftp:// or ldap:// -
1265 they are considered "absolute" paths.
1266 Source: http://code.activestate.com/recipes/208993/
1268 if protocolPattern.match(string): return 1
1269 return os.path.isabs(string)
1271 def rel2abs(path, base = os.curdir):
1272 """ converts a relative path to an absolute path.
1274 @param path the path to convert - if already absolute, is returned
1275 without conversion.
1276 @param base - optional. Defaults to the current directory.
1277 The base is intelligently concatenated to the given relative path.
1278 @return the relative path of path from base
1279 Source: http://code.activestate.com/recipes/208993/
1281 if isabs(path): return path
1282 retval = os.path.join(base,path)
1283 return os.path.abspath(retval)
1285 def commonpath(l1, l2, common=[]):
1287 helper functions for relpath
1288 Source: http://code.activestate.com/recipes/208993/
1290 if len(l1) < 1: return (common, l1, l2)
1291 if len(l2) < 1: return (common, l1, l2)
1292 if l1[0] != l2[0]: return (common, l1, l2)
1293 return commonpath(l1[1:], l2[1:], common+[l1[0]])
1295 def relpath(p1, p2):
1297 Finds relative path from p1 to p2
1298 Source: http://code.activestate.com/recipes/208993/
1300 pathsplit = lambda s: s.split(os.path.sep)
1302 (common,l1,l2) = commonpath(pathsplit(p1), pathsplit(p2))
1303 p = []
1304 if len(l1) > 0:
1305 p = [ ('..'+os.sep) * len(l1) ]
1306 p = p + l2
1307 if len(p) is 0:
1308 return "."
1310 return os.path.join(*p)
1313 def run_external_command(command_line):
1315 This is the function that will be called in a separate
1316 thread that will call an external command (specified by
1317 command_line). In case of problem (i.e. the command has
1318 not been found or there has been another error), we will
1319 call the notification function with two arguments - the
1320 first being the error message and the second being the
1321 title to be used for the error message.
1323 >>> from minimock import mock, Mock, restore
1324 >>> mock('subprocess.Popen', returns=Mock('subprocess.Popen'))
1325 >>> run_external_command('testprogramm')
1326 Called subprocess.Popen('testprogramm', shell=True)
1327 Called subprocess.Popen.wait()
1328 >>> restore()
1331 def open_process(command_line):
1332 log('Running external command: %s', command_line)
1333 p = subprocess.Popen(command_line, shell=True)
1334 result = p.wait()
1335 if result == 127:
1336 log('Command not found: %s', command_line)
1337 elif result == 126:
1338 log('Command permission denied: %s', command_line)
1339 elif result > 0:
1340 log('Command returned an error (%d): %s', result, command_line)
1341 else:
1342 log('Command finished successfully: %s', command_line)
1344 threading.Thread(target=open_process, args=(command_line,)).start()