Lib/distutils/util.py

   1 """distutils.util
   2
   3 Miscellaneous utility functions -- anything that doesn't fit into
   4 one of the other *util.py modules.
   5 """
   6
   7 __revision__ = "$Id$"
   8
   9 import sys, os, string, re
  10 from distutils.errors import DistutilsPlatformError
  11 from distutils.dep_util import newer
  12 from distutils.spawn import spawn
  13 from distutils import log
  14
  15 def get_platform ():
  16     """Return a string that identifies the current platform.  This is used
  17     mainly to distinguish platform-specific build directories and
  18     platform-specific built distributions.  Typically includes the OS name
  19     and version and the architecture (as supplied by 'os.uname()'),
  20     although the exact information included depends on the OS; eg. for IRIX
  21     the architecture isn't particularly important (IRIX only runs on SGI
  22     hardware), but for Linux the kernel version isn't particularly
  23     important.
  24
  25     Examples of returned values:
  26        linux-i586
  27        linux-alpha (?)
  28        solaris-2.6-sun4u
  29        irix-5.3
  30        irix64-6.2
  31
  32     For non-POSIX platforms, currently just returns 'sys.platform'.
  33     """
  34     if os.name != "posix" or not hasattr(os, 'uname'):
  35         # XXX what about the architecture? NT is Intel or Alpha,
  36         # Mac OS is M68k or PPC, etc.
  37         return sys.platform
  38
  39     # Try to distinguish various flavours of Unix
  40
  41     (osname, host, release, version, machine) = os.uname()
  42
  43     # Convert the OS name to lowercase, remove '/' characters
  44     # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
  45     osname = string.lower(osname)
  46     osname = string.replace(osname, '/', '')
  47     machine = string.replace(machine, ' ', '_')
  48     machine = string.replace(machine, '/', '-')
  49
  50     if osname[:5] == "linux":
  51         # At least on Linux/Intel, 'machine' is the processor --
  52         # i386, etc.
  53         # XXX what about Alpha, SPARC, etc?
  54         return  "%s-%s" % (osname, machine)
  55     elif osname[:5] == "sunos":
  56         if release[0] >= "5":           # SunOS 5 == Solaris 2
  57             osname = "solaris"
  58             release = "%d.%s" % (int(release[0]) - 3, release[2:])
  59         # fall through to standard osname-release-machine representation
  60     elif osname[:4] == "irix":              # could be "irix64"!
  61         return "%s-%s" % (osname, release)
  62     elif osname[:3] == "aix":
  63         return "%s-%s.%s" % (osname, version, release)
  64     elif osname[:6] == "cygwin":
  65         osname = "cygwin"
  66         rel_re = re.compile (r'[\d.]+')
  67         m = rel_re.match(release)
  68         if m:
  69             release = m.group()
  70     elif osname[:6] == "darwin":
  71         #
  72         # For our purposes, we'll assume that the system version from
  73         # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
  74         # to. This makes the compatibility story a bit more sane because the
  75         # machine is going to compile and link as if it were
  76         # MACOSX_DEPLOYMENT_TARGET.
  77         from distutils.sysconfig import get_config_vars
  78         cfgvars = get_config_vars()
  79
  80         macver = os.environ.get('MACOSX_DEPLOYMENT_TARGET')
  81         if not macver:
  82             macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
  83
  84         if not macver:
  85             # Get the system version. Reading this plist is a documented
  86             # way to get the system version (see the documentation for
  87             # the Gestalt Manager)
  88             try:
  89                 f = open('/System/Library/CoreServices/SystemVersion.plist')
  90             except IOError:
  91                 # We're on a plain darwin box, fall back to the default
  92                 # behaviour.
  93                 pass
  94             else:
  95                 m = re.search(
  96                         r'<key>ProductUserVisibleVersion</key>\s*' +
  97                         r'<string>(.*?)</string>', f.read())
  98                 f.close()
  99                 if m is not None:
 100                     macver = '.'.join(m.group(1).split('.')[:2])
 101                 # else: fall back to the default behaviour
 102
 103         if macver:
 104             from distutils.sysconfig import get_config_vars
 105             release = macver
 106             osname = "macosx"
 107
 108
 109             if (release + '.') < '10.4.' and \
 110                     get_config_vars().get('UNIVERSALSDK', '').strip():
 111                 # The universal build will build fat binaries, but not on
 112                 # systems before 10.4
 113                 machine = 'fat'
 114
 115             elif machine in ('PowerPC', 'Power_Macintosh'):
 116                 # Pick a sane name for the PPC architecture.
 117                 machine = 'ppc'
 118
 119     return "%s-%s-%s" % (osname, release, machine)
 120
 121 # get_platform ()
 122
 123
 124 def convert_path (pathname):
 125     """Return 'pathname' as a name that will work on the native filesystem,
 126     i.e. split it on '/' and put it back together again using the current
 127     directory separator.  Needed because filenames in the setup script are
 128     always supplied in Unix style, and have to be converted to the local
 129     convention before we can actually use them in the filesystem.  Raises
 130     ValueError on non-Unix-ish systems if 'pathname' either starts or
 131     ends with a slash.
 132     """
 133     if os.sep == '/':
 134         return pathname
 135     if not pathname:
 136         return pathname
 137     if pathname[0] == '/':
 138         raise ValueError, "path '%s' cannot be absolute" % pathname
 139     if pathname[-1] == '/':
 140         raise ValueError, "path '%s' cannot end with '/'" % pathname
 141
 142     paths = string.split(pathname, '/')
 143     while '.' in paths:
 144         paths.remove('.')
 145     if not paths:
 146         return os.curdir
 147     return apply(os.path.join, paths)
 148
 149 # convert_path ()
 150
 151
 152 def change_root (new_root, pathname):
 153     """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
 154     relative, this is equivalent to "os.path.join(new_root,pathname)".
 155     Otherwise, it requires making 'pathname' relative and then joining the
 156     two, which is tricky on DOS/Windows and Mac OS.
 157     """
 158     if os.name == 'posix':
 159         if not os.path.isabs(pathname):
 160             return os.path.join(new_root, pathname)
 161         else:
 162             return os.path.join(new_root, pathname[1:])
 163
 164     elif os.name == 'nt':
 165         (drive, path) = os.path.splitdrive(pathname)
 166         if path[0] == '\\':
 167             path = path[1:]
 168         return os.path.join(new_root, path)
 169
 170     elif os.name == 'os2':
 171         (drive, path) = os.path.splitdrive(pathname)
 172         if path[0] == os.sep:
 173             path = path[1:]
 174         return os.path.join(new_root, path)
 175
 176     elif os.name == 'mac':
 177         if not os.path.isabs(pathname):
 178             return os.path.join(new_root, pathname)
 179         else:
 180             # Chop off volume name from start of path
 181             elements = string.split(pathname, ":", 1)
 182             pathname = ":" + elements[1]
 183             return os.path.join(new_root, pathname)
 184
 185     else:
 186         raise DistutilsPlatformError, \
 187               "nothing known about platform '%s'" % os.name
 188
 189
 190 _environ_checked = 0
 191 def check_environ ():
 192     """Ensure that 'os.environ' has all the environment variables we
 193     guarantee that users can use in config files, command-line options,
 194     etc.  Currently this includes:
 195       HOME - user's home directory (Unix only)
 196       PLAT - description of the current platform, including hardware
 197              and OS (see 'get_platform()')
 198     """
 199     global _environ_checked
 200     if _environ_checked:
 201         return
 202
 203     if os.name == 'posix' and not os.environ.has_key('HOME'):
 204         import pwd
 205         os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
 206
 207     if not os.environ.has_key('PLAT'):
 208         os.environ['PLAT'] = get_platform()
 209
 210     _environ_checked = 1
 211
 212
 213 def subst_vars (s, local_vars):
 214     """Perform shell/Perl-style variable substitution on 'string'.  Every
 215     occurrence of '$' followed by a name is considered a variable, and
 216     variable is substituted by the value found in the 'local_vars'
 217     dictionary, or in 'os.environ' if it's not in 'local_vars'.
 218     'os.environ' is first checked/augmented to guarantee that it contains
 219     certain values: see 'check_environ()'.  Raise ValueError for any
 220     variables not found in either 'local_vars' or 'os.environ'.
 221     """
 222     check_environ()
 223     def _subst (match, local_vars=local_vars):
 224         var_name = match.group(1)
 225         if local_vars.has_key(var_name):
 226             return str(local_vars[var_name])
 227         else:
 228             return os.environ[var_name]
 229
 230     try:
 231         return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
 232     except KeyError, var:
 233         raise ValueError, "invalid variable '$%s'" % var
 234
 235 # subst_vars ()
 236
 237
 238 def grok_environment_error (exc, prefix="error: "):
 239     """Generate a useful error message from an EnvironmentError (IOError or
 240     OSError) exception object.  Handles Python 1.5.1 and 1.5.2 styles, and
 241     does what it can to deal with exception objects that don't have a
 242     filename (which happens when the error is due to a two-file operation,
 243     such as 'rename()' or 'link()'.  Returns the error message as a string
 244     prefixed with 'prefix'.
 245     """
 246     # check for Python 1.5.2-style {IO,OS}Error exception objects
 247     if hasattr(exc, 'filename') and hasattr(exc, 'strerror'):
 248         if exc.filename:
 249             error = prefix + "%s: %s" % (exc.filename, exc.strerror)
 250         else:
 251             # two-argument functions in posix module don't
 252             # include the filename in the exception object!
 253             error = prefix + "%s" % exc.strerror
 254     else:
 255         error = prefix + str(exc[-1])
 256
 257     return error
 258
 259
 260 # Needed by 'split_quoted()'
 261 _wordchars_re = _squote_re = _dquote_re = None
 262 def _init_regex():
 263     global _wordchars_re, _squote_re, _dquote_re
 264     _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
 265     _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
 266     _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
 267
 268 def split_quoted (s):
 269     """Split a string up according to Unix shell-like rules for quotes and
 270     backslashes.  In short: words are delimited by spaces, as long as those
 271     spaces are not escaped by a backslash, or inside a quoted string.
 272     Single and double quotes are equivalent, and the quote characters can
 273     be backslash-escaped.  The backslash is stripped from any two-character
 274     escape sequence, leaving only the escaped character.  The quote
 275     characters are stripped from any quoted string.  Returns a list of
 276     words.
 277     """
 278
 279     # This is a nice algorithm for splitting up a single string, since it
 280     # doesn't require character-by-character examination.  It was a little
 281     # bit of a brain-bender to get it working right, though...
 282     if _wordchars_re is None: _init_regex()
 283
 284     s = string.strip(s)
 285     words = []
 286     pos = 0
 287
 288     while s:
 289         m = _wordchars_re.match(s, pos)
 290         end = m.end()
 291         if end == len(s):
 292             words.append(s[:end])
 293             break
 294
 295         if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
 296             words.append(s[:end])       # we definitely have a word delimiter
 297             s = string.lstrip(s[end:])
 298             pos = 0
 299
 300         elif s[end] == '\\':            # preserve whatever is being escaped;
 301                                         # will become part of the current word
 302             s = s[:end] + s[end+1:]
 303             pos = end+1
 304
 305         else:
 306             if s[end] == "'":           # slurp singly-quoted string
 307                 m = _squote_re.match(s, end)
 308             elif s[end] == '"':         # slurp doubly-quoted string
 309                 m = _dquote_re.match(s, end)
 310             else:
 311                 raise RuntimeError, \
 312                       "this can't happen (bad char '%c')" % s[end]
 313
 314             if m is None:
 315                 raise ValueError, \
 316                       "bad string (mismatched %s quotes?)" % s[end]
 317
 318             (beg, end) = m.span()
 319             s = s[:beg] + s[beg+1:end-1] + s[end:]
 320             pos = m.end() - 2
 321
 322         if pos >= len(s):
 323             words.append(s)
 324             break
 325
 326     return words
 327
 328 # split_quoted ()
 329
 330
 331 def execute (func, args, msg=None, verbose=0, dry_run=0):
 332     """Perform some action that affects the outside world (eg.  by
 333     writing to the filesystem).  Such actions are special because they
 334     are disabled by the 'dry_run' flag.  This method takes care of all
 335     that bureaucracy for you; all you have to do is supply the
 336     function to call and an argument tuple for it (to embody the
 337     "external action" being performed), and an optional message to
 338     print.
 339     """
 340     if msg is None:
 341         msg = "%s%r" % (func.__name__, args)
 342         if msg[-2:] == ',)':        # correct for singleton tuple
 343             msg = msg[0:-2] + ')'
 344
 345     log.info(msg)
 346     if not dry_run:
 347         apply(func, args)
 348
 349
 350 def strtobool (val):
 351     """Convert a string representation of truth to true (1) or false (0).
 352
 353     True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
 354     are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
 355     'val' is anything else.
 356     """
 357     val = string.lower(val)
 358     if val in ('y', 'yes', 't', 'true', 'on', '1'):
 359         return 1
 360     elif val in ('n', 'no', 'f', 'false', 'off', '0'):
 361         return 0
 362     else:
 363         raise ValueError, "invalid truth value %r" % (val,)
 364
 365
 366 def byte_compile (py_files,
 367                   optimize=0, force=0,
 368                   prefix=None, base_dir=None,
 369                   verbose=1, dry_run=0,
 370                   direct=None):
 371     """Byte-compile a collection of Python source files to either .pyc
 372     or .pyo files in the same directory.  'py_files' is a list of files
 373     to compile; any files that don't end in ".py" are silently skipped.
 374     'optimize' must be one of the following:
 375       0 - don't optimize (generate .pyc)
 376       1 - normal optimization (like "python -O")
 377       2 - extra optimization (like "python -OO")
 378     If 'force' is true, all files are recompiled regardless of
 379     timestamps.
 380
 381     The source filename encoded in each bytecode file defaults to the
 382     filenames listed in 'py_files'; you can modify these with 'prefix' and
 383     'basedir'.  'prefix' is a string that will be stripped off of each
 384     source filename, and 'base_dir' is a directory name that will be
 385     prepended (after 'prefix' is stripped).  You can supply either or both
 386     (or neither) of 'prefix' and 'base_dir', as you wish.
 387
 388     If 'dry_run' is true, doesn't actually do anything that would
 389     affect the filesystem.
 390
 391     Byte-compilation is either done directly in this interpreter process
 392     with the standard py_compile module, or indirectly by writing a
 393     temporary script and executing it.  Normally, you should let
 394     'byte_compile()' figure out to use direct compilation or not (see
 395     the source for details).  The 'direct' flag is used by the script
 396     generated in indirect mode; unless you know what you're doing, leave
 397     it set to None.
 398     """
 399
 400     # First, if the caller didn't force us into direct or indirect mode,
 401     # figure out which mode we should be in.  We take a conservative
 402     # approach: choose direct mode *only* if the current interpreter is
 403     # in debug mode and optimize is 0.  If we're not in debug mode (-O
 404     # or -OO), we don't know which level of optimization this
 405     # interpreter is running with, so we can't do direct
 406     # byte-compilation and be certain that it's the right thing.  Thus,
 407     # always compile indirectly if the current interpreter is in either
 408     # optimize mode, or if either optimization level was requested by
 409     # the caller.
 410     if direct is None:
 411         direct = (__debug__ and optimize == 0)
 412
 413     # "Indirect" byte-compilation: write a temporary script and then
 414     # run it with the appropriate flags.
 415     if not direct:
 416         try:
 417             from tempfile import mkstemp
 418             (script_fd, script_name) = mkstemp(".py")
 419         except ImportError:
 420             from tempfile import mktemp
 421             (script_fd, script_name) = None, mktemp(".py")
 422         log.info("writing byte-compilation script '%s'", script_name)
 423         if not dry_run:
 424             if script_fd is not None:
 425                 script = os.fdopen(script_fd, "w")
 426             else:
 427                 script = open(script_name, "w")
 428
 429             script.write("""\
 430 from distutils.util import byte_compile
 431 files = [
 432 """)
 433
 434             # XXX would be nice to write absolute filenames, just for
 435             # safety's sake (script should be more robust in the face of
 436             # chdir'ing before running it).  But this requires abspath'ing
 437             # 'prefix' as well, and that breaks the hack in build_lib's
 438             # 'byte_compile()' method that carefully tacks on a trailing
 439             # slash (os.sep really) to make sure the prefix here is "just
 440             # right".  This whole prefix business is rather delicate -- the
 441             # problem is that it's really a directory, but I'm treating it
 442             # as a dumb string, so trailing slashes and so forth matter.
 443
 444             #py_files = map(os.path.abspath, py_files)
 445             #if prefix:
 446             #    prefix = os.path.abspath(prefix)
 447
 448             script.write(string.join(map(repr, py_files), ",\n") + "]\n")
 449             script.write("""
 450 byte_compile(files, optimize=%r, force=%r,
 451              prefix=%r, base_dir=%r,
 452              verbose=%r, dry_run=0,
 453              direct=1)
 454 """ % (optimize, force, prefix, base_dir, verbose))
 455
 456             script.close()
 457
 458         cmd = [sys.executable, script_name]
 459         if optimize == 1:
 460             cmd.insert(1, "-O")
 461         elif optimize == 2:
 462             cmd.insert(1, "-OO")
 463         spawn(cmd, dry_run=dry_run)
 464         execute(os.remove, (script_name,), "removing %s" % script_name,
 465                 dry_run=dry_run)
 466
 467     # "Direct" byte-compilation: use the py_compile module to compile
 468     # right here, right now.  Note that the script generated in indirect
 469     # mode simply calls 'byte_compile()' in direct mode, a weird sort of
 470     # cross-process recursion.  Hey, it works!
 471     else:
 472         from py_compile import compile
 473
 474         for file in py_files:
 475             if file[-3:] != ".py":
 476                 # This lets us be lazy and not filter filenames in
 477                 # the "install_lib" command.
 478                 continue
 479
 480             # Terminology from the py_compile module:
 481             #   cfile - byte-compiled file
 482             #   dfile - purported source filename (same as 'file' by default)
 483             cfile = file + (__debug__ and "c" or "o")
 484             dfile = file
 485             if prefix:
 486                 if file[:len(prefix)] != prefix:
 487                     raise ValueError, \
 488                           ("invalid prefix: filename %r doesn't start with %r"
 489                            % (file, prefix))
 490                 dfile = dfile[len(prefix):]
 491             if base_dir:
 492                 dfile = os.path.join(base_dir, dfile)
 493
 494             cfile_base = os.path.basename(cfile)
 495             if direct:
 496                 if force or newer(file, cfile):
 497                     log.info("byte-compiling %s to %s", file, cfile_base)
 498                     if not dry_run:
 499                         compile(file, cfile, dfile)
 500                 else:
 501                     log.debug("skipping byte-compilation of %s to %s",
 502                               file, cfile_base)
 503
 504 # byte_compile ()
 505
 506 def rfc822_escape (header):
 507     """Return a version of the string escaped for inclusion in an
 508     RFC-822 header, by ensuring there are 8 spaces space after each newline.
 509     """
 510     lines = string.split(header, '\n')
 511     lines = map(string.strip, lines)
 512     header = string.join(lines, '\n' + 8*' ')
 513     return header