Lib/distutils/util.py

   1 """distutils.util
   2
   3 Miscellaneous utility functions -- anything that doesn't fit into
   4 one of the other *util.py modules.
   5 """
   6
   7 __revision__ = "$Id$"
   8
   9 import sys, os, string, re
  10
  11 from distutils.errors import DistutilsPlatformError
  12 from distutils.dep_util import newer
  13 from distutils.spawn import spawn, find_executable
  14 from distutils import log
  15 from distutils.version import LooseVersion
  16 from distutils.errors import DistutilsByteCompileError
  17
  18 def get_platform():
  19     """Return a string that identifies the current platform.
  20
  21     This is used mainly to distinguish platform-specific build directories and
  22     platform-specific built distributions.  Typically includes the OS name
  23     and version and the architecture (as supplied by 'os.uname()'),
  24     although the exact information included depends on the OS; eg. for IRIX
  25     the architecture isn't particularly important (IRIX only runs on SGI
  26     hardware), but for Linux the kernel version isn't particularly
  27     important.
  28
  29     Examples of returned values:
  30        linux-i586
  31        linux-alpha (?)
  32        solaris-2.6-sun4u
  33        irix-5.3
  34        irix64-6.2
  35
  36     Windows will return one of:
  37        win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
  38        win-ia64 (64bit Windows on Itanium)
  39        win32 (all others - specifically, sys.platform is returned)
  40
  41     For other non-POSIX platforms, currently just returns 'sys.platform'.
  42     """
  43     if os.name == 'nt':
  44         # sniff sys.version for architecture.
  45         prefix = " bit ("
  46         i = sys.version.find(prefix)
  47         if i == -1:
  48             return sys.platform
  49         j = sys.version.find(")", i)
  50         look = sys.version[i+len(prefix):j].lower()
  51         if look == 'amd64':
  52             return 'win-amd64'
  53         if look == 'itanium':
  54             return 'win-ia64'
  55         return sys.platform
  56
  57     if os.name != "posix" or not hasattr(os, 'uname'):
  58         # XXX what about the architecture? NT is Intel or Alpha,
  59         # Mac OS is M68k or PPC, etc.
  60         return sys.platform
  61
  62     # Try to distinguish various flavours of Unix
  63
  64     (osname, host, release, version, machine) = os.uname()
  65
  66     # Convert the OS name to lowercase, remove '/' characters
  67     # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
  68     osname = osname.lower().replace('/', '')
  69     machine = machine.replace(' ', '_')
  70     machine = machine.replace('/', '-')
  71
  72     if osname[:5] == "linux":
  73         # At least on Linux/Intel, 'machine' is the processor --
  74         # i386, etc.
  75         # XXX what about Alpha, SPARC, etc?
  76         return  "%s-%s" % (osname, machine)
  77     elif osname[:5] == "sunos":
  78         if release[0] >= "5":           # SunOS 5 == Solaris 2
  79             osname = "solaris"
  80             release = "%d.%s" % (int(release[0]) - 3, release[2:])
  81         # fall through to standard osname-release-machine representation
  82     elif osname[:4] == "irix":              # could be "irix64"!
  83         return "%s-%s" % (osname, release)
  84     elif osname[:3] == "aix":
  85         return "%s-%s.%s" % (osname, version, release)
  86     elif osname[:6] == "cygwin":
  87         osname = "cygwin"
  88         rel_re = re.compile (r'[\d.]+')
  89         m = rel_re.match(release)
  90         if m:
  91             release = m.group()
  92     elif osname[:6] == "darwin":
  93         #
  94         # For our purposes, we'll assume that the system version from
  95         # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
  96         # to. This makes the compatibility story a bit more sane because the
  97         # machine is going to compile and link as if it were
  98         # MACOSX_DEPLOYMENT_TARGET.
  99         from distutils.sysconfig import get_config_vars
 100         cfgvars = get_config_vars()
 101
 102         macver = os.environ.get('MACOSX_DEPLOYMENT_TARGET')
 103         if not macver:
 104             macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
 105
 106         if 1:
 107             # Always calculate the release of the running machine,
 108             # needed to determine if we can build fat binaries or not.
 109
 110             macrelease = macver
 111             # Get the system version. Reading this plist is a documented
 112             # way to get the system version (see the documentation for
 113             # the Gestalt Manager)
 114             try:
 115                 f = open('/System/Library/CoreServices/SystemVersion.plist')
 116             except IOError:
 117                 # We're on a plain darwin box, fall back to the default
 118                 # behaviour.
 119                 pass
 120             else:
 121                 m = re.search(
 122                         r'<key>ProductUserVisibleVersion</key>\s*' +
 123                         r'<string>(.*?)</string>', f.read())
 124                 f.close()
 125                 if m is not None:
 126                     macrelease = '.'.join(m.group(1).split('.')[:2])
 127                 # else: fall back to the default behaviour
 128
 129         if not macver:
 130             macver = macrelease
 131
 132         if macver:
 133             from distutils.sysconfig import get_config_vars
 134             release = macver
 135             osname = "macosx"
 136
 137             if (macrelease + '.') >= '10.4.' and \
 138                     '-arch' in get_config_vars().get('CFLAGS', '').strip():
 139                 # The universal build will build fat binaries, but not on
 140                 # systems before 10.4
 141                 #
 142                 # Try to detect 4-way universal builds, those have machine-type
 143                 # 'universal' instead of 'fat'.
 144
 145                 machine = 'fat'
 146                 cflags = get_config_vars().get('CFLAGS')
 147
 148                 archs = re.findall('-arch\s+(\S+)', cflags)
 149                 archs.sort()
 150                 archs = tuple(archs)
 151
 152                 if len(archs) == 1:
 153                     machine = archs[0]
 154                 elif archs == ('i386', 'ppc'):
 155                     machine = 'fat'
 156                 elif archs == ('i386', 'x86_64'):
 157                     machine = 'intel'
 158                 elif archs == ('i386', 'ppc', 'x86_64'):
 159                     machine = 'fat3'
 160                 elif archs == ('ppc64', 'x86_64'):
 161                     machine = 'fat64'
 162                 elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
 163                     machine = 'universal'
 164                 else:
 165                     raise ValueError(
 166                        "Don't know machine value for archs=%r"%(archs,))
 167
 168             elif machine == 'i386':
 169                 # On OSX the machine type returned by uname is always the
 170                 # 32-bit variant, even if the executable architecture is
 171                 # the 64-bit variant
 172                 if sys.maxint >= 2**32:
 173                     machine = 'x86_64'
 174
 175             elif machine in ('PowerPC', 'Power_Macintosh'):
 176                 # Pick a sane name for the PPC architecture.
 177                 machine = 'ppc'
 178
 179                 # See 'i386' case
 180                 if sys.maxint >= 2**32:
 181                     machine = 'ppc64'
 182
 183     return "%s-%s-%s" % (osname, release, machine)
 184
 185
 186 def convert_path(pathname):
 187     """Return 'pathname' as a name that will work on the native filesystem.
 188
 189     i.e. split it on '/' and put it back together again using the current
 190     directory separator.  Needed because filenames in the setup script are
 191     always supplied in Unix style, and have to be converted to the local
 192     convention before we can actually use them in the filesystem.  Raises
 193     ValueError on non-Unix-ish systems if 'pathname' either starts or
 194     ends with a slash.
 195     """
 196     if os.sep == '/':
 197         return pathname
 198     if not pathname:
 199         return pathname
 200     if pathname[0] == '/':
 201         raise ValueError("path '%s' cannot be absolute" % pathname)
 202     if pathname[-1] == '/':
 203         raise ValueError("path '%s' cannot end with '/'" % pathname)
 204
 205     paths = pathname.split('/')
 206     while '.' in paths:
 207         paths.remove('.')
 208     if not paths:
 209         return os.curdir
 210     return os.path.join(*paths)
 211
 212
 213 def change_root(new_root, pathname):
 214     """Return 'pathname' with 'new_root' prepended.
 215
 216     If 'pathname' is relative, this is equivalent to
 217     "os.path.join(new_root,pathname)".
 218     Otherwise, it requires making 'pathname' relative and then joining the
 219     two, which is tricky on DOS/Windows and Mac OS.
 220     """
 221     if os.name == 'posix':
 222         if not os.path.isabs(pathname):
 223             return os.path.join(new_root, pathname)
 224         else:
 225             return os.path.join(new_root, pathname[1:])
 226
 227     elif os.name == 'nt':
 228         (drive, path) = os.path.splitdrive(pathname)
 229         if path[0] == '\\':
 230             path = path[1:]
 231         return os.path.join(new_root, path)
 232
 233     elif os.name == 'os2':
 234         (drive, path) = os.path.splitdrive(pathname)
 235         if path[0] == os.sep:
 236             path = path[1:]
 237         return os.path.join(new_root, path)
 238
 239     elif os.name == 'mac':
 240         if not os.path.isabs(pathname):
 241             return os.path.join(new_root, pathname)
 242         else:
 243             # Chop off volume name from start of path
 244             elements = pathname.split(":", 1)
 245             pathname = ":" + elements[1]
 246             return os.path.join(new_root, pathname)
 247
 248     else:
 249         raise DistutilsPlatformError("nothing known about "
 250                                      "platform '%s'" % os.name)
 251
 252 _environ_checked = 0
 253
 254 def check_environ():
 255     """Ensure that 'os.environ' has all the environment variables needed.
 256
 257     We guarantee that users can use in config files, command-line options,
 258     etc.  Currently this includes:
 259       HOME - user's home directory (Unix only)
 260       PLAT - description of the current platform, including hardware
 261              and OS (see 'get_platform()')
 262     """
 263     global _environ_checked
 264     if _environ_checked:
 265         return
 266
 267     if os.name == 'posix' and 'HOME' not in os.environ:
 268         import pwd
 269         os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
 270
 271     if 'PLAT' not in os.environ:
 272         os.environ['PLAT'] = get_platform()
 273
 274     _environ_checked = 1
 275
 276 def subst_vars(s, local_vars):
 277     """Perform shell/Perl-style variable substitution on 'string'.
 278
 279     Every occurrence of '$' followed by a name is considered a variable, and
 280     variable is substituted by the value found in the 'local_vars'
 281     dictionary, or in 'os.environ' if it's not in 'local_vars'.
 282     'os.environ' is first checked/augmented to guarantee that it contains
 283     certain values: see 'check_environ()'.  Raise ValueError for any
 284     variables not found in either 'local_vars' or 'os.environ'.
 285     """
 286     check_environ()
 287     def _subst (match, local_vars=local_vars):
 288         var_name = match.group(1)
 289         if var_name in local_vars:
 290             return str(local_vars[var_name])
 291         else:
 292             return os.environ[var_name]
 293
 294     try:
 295         return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
 296     except KeyError, var:
 297         raise ValueError("invalid variable '$%s'" % var)
 298
 299 def grok_environment_error(exc, prefix="error: "):
 300     """Generate a useful error message from an EnvironmentError.
 301
 302     This will generate an IOError or an OSError exception object.
 303     Handles Python 1.5.1 and 1.5.2 styles, and
 304     does what it can to deal with exception objects that don't have a
 305     filename (which happens when the error is due to a two-file operation,
 306     such as 'rename()' or 'link()'.  Returns the error message as a string
 307     prefixed with 'prefix'.
 308     """
 309     # check for Python 1.5.2-style {IO,OS}Error exception objects
 310     if hasattr(exc, 'filename') and hasattr(exc, 'strerror'):
 311         if exc.filename:
 312             error = prefix + "%s: %s" % (exc.filename, exc.strerror)
 313         else:
 314             # two-argument functions in posix module don't
 315             # include the filename in the exception object!
 316             error = prefix + "%s" % exc.strerror
 317     else:
 318         error = prefix + str(exc[-1])
 319
 320     return error
 321
 322 # Needed by 'split_quoted()'
 323 _wordchars_re = _squote_re = _dquote_re = None
 324
 325 def _init_regex():
 326     global _wordchars_re, _squote_re, _dquote_re
 327     _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
 328     _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
 329     _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
 330
 331 def split_quoted(s):
 332     """Split a string up according to Unix shell-like rules for quotes and
 333     backslashes.
 334
 335     In short: words are delimited by spaces, as long as those
 336     spaces are not escaped by a backslash, or inside a quoted string.
 337     Single and double quotes are equivalent, and the quote characters can
 338     be backslash-escaped.  The backslash is stripped from any two-character
 339     escape sequence, leaving only the escaped character.  The quote
 340     characters are stripped from any quoted string.  Returns a list of
 341     words.
 342     """
 343     # This is a nice algorithm for splitting up a single string, since it
 344     # doesn't require character-by-character examination.  It was a little
 345     # bit of a brain-bender to get it working right, though...
 346     if _wordchars_re is None: _init_regex()
 347
 348     s = s.strip()
 349     words = []
 350     pos = 0
 351
 352     while s:
 353         m = _wordchars_re.match(s, pos)
 354         end = m.end()
 355         if end == len(s):
 356             words.append(s[:end])
 357             break
 358
 359         if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
 360             words.append(s[:end])       # we definitely have a word delimiter
 361             s = s[end:].lstrip()
 362             pos = 0
 363
 364         elif s[end] == '\\':            # preserve whatever is being escaped;
 365                                         # will become part of the current word
 366             s = s[:end] + s[end+1:]
 367             pos = end+1
 368
 369         else:
 370             if s[end] == "'":           # slurp singly-quoted string
 371                 m = _squote_re.match(s, end)
 372             elif s[end] == '"':         # slurp doubly-quoted string
 373                 m = _dquote_re.match(s, end)
 374             else:
 375                 raise RuntimeError("this can't happen "
 376                                    "(bad char '%c')" % s[end])
 377
 378             if m is None:
 379                 raise ValueError("bad string (mismatched %s quotes?)" % s[end])
 380
 381             (beg, end) = m.span()
 382             s = s[:beg] + s[beg+1:end-1] + s[end:]
 383             pos = m.end() - 2
 384
 385         if pos >= len(s):
 386             words.append(s)
 387             break
 388
 389     return words
 390
 391
 392 def execute(func, args, msg=None, verbose=0, dry_run=0):
 393     """Perform some action that affects the outside world.
 394
 395     eg. by writing to the filesystem).  Such actions are special because
 396     they are disabled by the 'dry_run' flag.  This method takes care of all
 397     that bureaucracy for you; all you have to do is supply the
 398     function to call and an argument tuple for it (to embody the
 399     "external action" being performed), and an optional message to
 400     print.
 401     """
 402     if msg is None:
 403         msg = "%s%r" % (func.__name__, args)
 404         if msg[-2:] == ',)':        # correct for singleton tuple
 405             msg = msg[0:-2] + ')'
 406
 407     log.info(msg)
 408     if not dry_run:
 409         func(*args)
 410
 411
 412 def strtobool(val):
 413     """Convert a string representation of truth to true (1) or false (0).
 414
 415     True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
 416     are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
 417     'val' is anything else.
 418     """
 419     val = val.lower()
 420     if val in ('y', 'yes', 't', 'true', 'on', '1'):
 421         return 1
 422     elif val in ('n', 'no', 'f', 'false', 'off', '0'):
 423         return 0
 424     else:
 425         raise ValueError, "invalid truth value %r" % (val,)
 426
 427
 428 def byte_compile(py_files, optimize=0, force=0, prefix=None, base_dir=None,
 429                   verbose=1, dry_run=0, direct=None):
 430     """Byte-compile a collection of Python source files to either .pyc
 431     or .pyo files in the same directory.
 432
 433     'py_files' is a list of files to compile; any files that don't end in
 434     ".py" are silently skipped. 'optimize' must be one of the following:
 435       0 - don't optimize (generate .pyc)
 436       1 - normal optimization (like "python -O")
 437       2 - extra optimization (like "python -OO")
 438     If 'force' is true, all files are recompiled regardless of
 439     timestamps.
 440
 441     The source filename encoded in each bytecode file defaults to the
 442     filenames listed in 'py_files'; you can modify these with 'prefix' and
 443     'basedir'.  'prefix' is a string that will be stripped off of each
 444     source filename, and 'base_dir' is a directory name that will be
 445     prepended (after 'prefix' is stripped).  You can supply either or both
 446     (or neither) of 'prefix' and 'base_dir', as you wish.
 447
 448     If 'dry_run' is true, doesn't actually do anything that would
 449     affect the filesystem.
 450
 451     Byte-compilation is either done directly in this interpreter process
 452     with the standard py_compile module, or indirectly by writing a
 453     temporary script and executing it.  Normally, you should let
 454     'byte_compile()' figure out to use direct compilation or not (see
 455     the source for details).  The 'direct' flag is used by the script
 456     generated in indirect mode; unless you know what you're doing, leave
 457     it set to None.
 458     """
 459     # nothing is done if sys.dont_write_bytecode is True
 460     if sys.dont_write_bytecode:
 461         raise DistutilsByteCompileError('byte-compiling is disabled.')
 462
 463     # First, if the caller didn't force us into direct or indirect mode,
 464     # figure out which mode we should be in.  We take a conservative
 465     # approach: choose direct mode *only* if the current interpreter is
 466     # in debug mode and optimize is 0.  If we're not in debug mode (-O
 467     # or -OO), we don't know which level of optimization this
 468     # interpreter is running with, so we can't do direct
 469     # byte-compilation and be certain that it's the right thing.  Thus,
 470     # always compile indirectly if the current interpreter is in either
 471     # optimize mode, or if either optimization level was requested by
 472     # the caller.
 473     if direct is None:
 474         direct = (__debug__ and optimize == 0)
 475
 476     # "Indirect" byte-compilation: write a temporary script and then
 477     # run it with the appropriate flags.
 478     if not direct:
 479         try:
 480             from tempfile import mkstemp
 481             (script_fd, script_name) = mkstemp(".py")
 482         except ImportError:
 483             from tempfile import mktemp
 484             (script_fd, script_name) = None, mktemp(".py")
 485         log.info("writing byte-compilation script '%s'", script_name)
 486         if not dry_run:
 487             if script_fd is not None:
 488                 script = os.fdopen(script_fd, "w")
 489             else:
 490                 script = open(script_name, "w")
 491
 492             script.write("""\
 493 from distutils.util import byte_compile
 494 files = [
 495 """)
 496
 497             # XXX would be nice to write absolute filenames, just for
 498             # safety's sake (script should be more robust in the face of
 499             # chdir'ing before running it).  But this requires abspath'ing
 500             # 'prefix' as well, and that breaks the hack in build_lib's
 501             # 'byte_compile()' method that carefully tacks on a trailing
 502             # slash (os.sep really) to make sure the prefix here is "just
 503             # right".  This whole prefix business is rather delicate -- the
 504             # problem is that it's really a directory, but I'm treating it
 505             # as a dumb string, so trailing slashes and so forth matter.
 506
 507             #py_files = map(os.path.abspath, py_files)
 508             #if prefix:
 509             #    prefix = os.path.abspath(prefix)
 510
 511             script.write(",\n".join(map(repr, py_files)) + "]\n")
 512             script.write("""
 513 byte_compile(files, optimize=%r, force=%r,
 514              prefix=%r, base_dir=%r,
 515              verbose=%r, dry_run=0,
 516              direct=1)
 517 """ % (optimize, force, prefix, base_dir, verbose))
 518
 519             script.close()
 520
 521         cmd = [sys.executable, script_name]
 522         if optimize == 1:
 523             cmd.insert(1, "-O")
 524         elif optimize == 2:
 525             cmd.insert(1, "-OO")
 526         spawn(cmd, dry_run=dry_run)
 527         execute(os.remove, (script_name,), "removing %s" % script_name,
 528                 dry_run=dry_run)
 529
 530     # "Direct" byte-compilation: use the py_compile module to compile
 531     # right here, right now.  Note that the script generated in indirect
 532     # mode simply calls 'byte_compile()' in direct mode, a weird sort of
 533     # cross-process recursion.  Hey, it works!
 534     else:
 535         from py_compile import compile
 536
 537         for file in py_files:
 538             if file[-3:] != ".py":
 539                 # This lets us be lazy and not filter filenames in
 540                 # the "install_lib" command.
 541                 continue
 542
 543             # Terminology from the py_compile module:
 544             #   cfile - byte-compiled file
 545             #   dfile - purported source filename (same as 'file' by default)
 546             cfile = file + (__debug__ and "c" or "o")
 547             dfile = file
 548             if prefix:
 549                 if file[:len(prefix)] != prefix:
 550                     raise ValueError("invalid prefix: filename %r doesn't "
 551                                      "start with %r" % (file, prefix))
 552                 dfile = dfile[len(prefix):]
 553             if base_dir:
 554                 dfile = os.path.join(base_dir, dfile)
 555
 556             cfile_base = os.path.basename(cfile)
 557             if direct:
 558                 if force or newer(file, cfile):
 559                     log.info("byte-compiling %s to %s", file, cfile_base)
 560                     if not dry_run:
 561                         compile(file, cfile, dfile)
 562                 else:
 563                     log.debug("skipping byte-compilation of %s to %s",
 564                               file, cfile_base)
 565
 566
 567 def rfc822_escape(header):
 568     """Return a version of the string escaped for inclusion in an
 569     RFC-822 header, by ensuring there are 8 spaces space after each newline.
 570     """
 571     lines = header.split('\n')
 572     sep = '\n' + 8 * ' '
 573     return sep.join(lines)
 574
 575 _RE_VERSION = re.compile('(\d+\.\d+(\.\d+)*)')
 576 _MAC_OS_X_LD_VERSION = re.compile('^@\(#\)PROGRAM:ld  PROJECT:ld64-((\d+)(\.\d+)*)')
 577
 578 def _find_ld_version():
 579     """Finds the ld version. The version scheme differs under Mac OSX."""
 580     if sys.platform == 'darwin':
 581         return _find_exe_version('ld -v', _MAC_OS_X_LD_VERSION)
 582     else:
 583         return _find_exe_version('ld -v')
 584
 585 def _find_exe_version(cmd, pattern=_RE_VERSION):
 586     """Find the version of an executable by running `cmd` in the shell.
 587
 588     `pattern` is a compiled regular expression. If not provided, default
 589     to _RE_VERSION. If the command is not found, or the output does not
 590     match the mattern, returns None.
 591     """
 592     from subprocess import Popen, PIPE
 593     executable = cmd.split()[0]
 594     if find_executable(executable) is None:
 595         return None
 596     pipe = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
 597     try:
 598         stdout, stderr = pipe.stdout.read(), pipe.stderr.read()
 599     finally:
 600         pipe.stdout.close()
 601         pipe.stderr.close()
 602     # some commands like ld under MacOS X, will give the
 603     # output in the stderr, rather than stdout.
 604     if stdout != '':
 605         out_string = stdout
 606     else:
 607         out_string = stderr
 608
 609     result = pattern.search(out_string)
 610     if result is None:
 611         return None
 612     return LooseVersion(result.group(1))
 613
 614 def get_compiler_versions():
 615     """Returns a tuple providing the versions of gcc, ld and dllwrap
 616
 617     For each command, if a command is not found, None is returned.
 618     Otherwise a LooseVersion instance is returned.
 619     """
 620     gcc = _find_exe_version('gcc -dumpversion')
 621     ld = _find_ld_version()
 622     dllwrap = _find_exe_version('dllwrap --version')
 623     return gcc, ld, dllwrap