Lib/distutils/util.py

   1 """distutils.util
   2
   3 Miscellaneous utility functions -- anything that doesn't fit into
   4 one of the other *util.py modules.
   5 """
   6
   7 __revision__ = "$Id$"
   8
   9 import sys, os, string, re
  10 from distutils.errors import DistutilsPlatformError
  11 from distutils.dep_util import newer
  12 from distutils.spawn import spawn
  13 from distutils import log
  14
  15 def get_platform ():
  16     """Return a string that identifies the current platform.  This is used
  17     mainly to distinguish platform-specific build directories and
  18     platform-specific built distributions.  Typically includes the OS name
  19     and version and the architecture (as supplied by 'os.uname()'),
  20     although the exact information included depends on the OS; eg. for IRIX
  21     the architecture isn't particularly important (IRIX only runs on SGI
  22     hardware), but for Linux the kernel version isn't particularly
  23     important.
  24
  25     Examples of returned values:
  26        linux-i586
  27        linux-alpha (?)
  28        solaris-2.6-sun4u
  29        irix-5.3
  30        irix64-6.2
  31
  32     Windows will return one of:
  33        win-x86_64 (64bit Windows on x86_64 (AMD64))
  34        win-ia64 (64bit Windows on Itanium)
  35        win32 (all others - specifically, sys.platform is returned)
  36
  37     For other non-POSIX platforms, currently just returns 'sys.platform'.
  38     """
  39     if os.name == 'nt':
  40         # sniff sys.version for architecture.
  41         prefix = " bit ("
  42         i = string.find(sys.version, prefix)
  43         if i == -1:
  44             return sys.platform
  45         j = string.find(sys.version, ")", i)
  46         look = sys.version[i+len(prefix):j].lower()
  47         if look=='amd64':
  48             return 'win-x86_64'
  49         if look=='itanium':
  50             return 'win-ia64'
  51         return sys.platform
  52
  53     if os.name != "posix" or not hasattr(os, 'uname'):
  54         # XXX what about the architecture? NT is Intel or Alpha,
  55         # Mac OS is M68k or PPC, etc.
  56         return sys.platform
  57
  58     # Try to distinguish various flavours of Unix
  59
  60     (osname, host, release, version, machine) = os.uname()
  61
  62     # Convert the OS name to lowercase, remove '/' characters
  63     # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
  64     osname = string.lower(osname)
  65     osname = string.replace(osname, '/', '')
  66     machine = string.replace(machine, ' ', '_')
  67     machine = string.replace(machine, '/', '-')
  68
  69     if osname[:5] == "linux":
  70         # At least on Linux/Intel, 'machine' is the processor --
  71         # i386, etc.
  72         # XXX what about Alpha, SPARC, etc?
  73         return  "%s-%s" % (osname, machine)
  74     elif osname[:5] == "sunos":
  75         if release[0] >= "5":           # SunOS 5 == Solaris 2
  76             osname = "solaris"
  77             release = "%d.%s" % (int(release[0]) - 3, release[2:])
  78         # fall through to standard osname-release-machine representation
  79     elif osname[:4] == "irix":              # could be "irix64"!
  80         return "%s-%s" % (osname, release)
  81     elif osname[:3] == "aix":
  82         return "%s-%s.%s" % (osname, version, release)
  83     elif osname[:6] == "cygwin":
  84         osname = "cygwin"
  85         rel_re = re.compile (r'[\d.]+')
  86         m = rel_re.match(release)
  87         if m:
  88             release = m.group()
  89     elif osname[:6] == "darwin":
  90         #
  91         # For our purposes, we'll assume that the system version from
  92         # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
  93         # to. This makes the compatibility story a bit more sane because the
  94         # machine is going to compile and link as if it were
  95         # MACOSX_DEPLOYMENT_TARGET.
  96         from distutils.sysconfig import get_config_vars
  97         cfgvars = get_config_vars()
  98
  99         macver = os.environ.get('MACOSX_DEPLOYMENT_TARGET')
 100         if not macver:
 101             macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
 102
 103         if not macver:
 104             # Get the system version. Reading this plist is a documented
 105             # way to get the system version (see the documentation for
 106             # the Gestalt Manager)
 107             try:
 108                 f = open('/System/Library/CoreServices/SystemVersion.plist')
 109             except IOError:
 110                 # We're on a plain darwin box, fall back to the default
 111                 # behaviour.
 112                 pass
 113             else:
 114                 m = re.search(
 115                         r'<key>ProductUserVisibleVersion</key>\s*' +
 116                         r'<string>(.*?)</string>', f.read())
 117                 f.close()
 118                 if m is not None:
 119                     macver = '.'.join(m.group(1).split('.')[:2])
 120                 # else: fall back to the default behaviour
 121
 122         if macver:
 123             from distutils.sysconfig import get_config_vars
 124             release = macver
 125             osname = "macosx"
 126
 127
 128             if (release + '.') < '10.4.' and \
 129                     get_config_vars().get('UNIVERSALSDK', '').strip():
 130                 # The universal build will build fat binaries, but not on
 131                 # systems before 10.4
 132                 machine = 'fat'
 133
 134             elif machine in ('PowerPC', 'Power_Macintosh'):
 135                 # Pick a sane name for the PPC architecture.
 136                 machine = 'ppc'
 137
 138     return "%s-%s-%s" % (osname, release, machine)
 139
 140 # get_platform ()
 141
 142
 143 def convert_path (pathname):
 144     """Return 'pathname' as a name that will work on the native filesystem,
 145     i.e. split it on '/' and put it back together again using the current
 146     directory separator.  Needed because filenames in the setup script are
 147     always supplied in Unix style, and have to be converted to the local
 148     convention before we can actually use them in the filesystem.  Raises
 149     ValueError on non-Unix-ish systems if 'pathname' either starts or
 150     ends with a slash.
 151     """
 152     if os.sep == '/':
 153         return pathname
 154     if not pathname:
 155         return pathname
 156     if pathname[0] == '/':
 157         raise ValueError, "path '%s' cannot be absolute" % pathname
 158     if pathname[-1] == '/':
 159         raise ValueError, "path '%s' cannot end with '/'" % pathname
 160
 161     paths = string.split(pathname, '/')
 162     while '.' in paths:
 163         paths.remove('.')
 164     if not paths:
 165         return os.curdir
 166     return apply(os.path.join, paths)
 167
 168 # convert_path ()
 169
 170
 171 def change_root (new_root, pathname):
 172     """Return 'pathname' with 'new_root' prepended.  If 'pathname' is
 173     relative, this is equivalent to "os.path.join(new_root,pathname)".
 174     Otherwise, it requires making 'pathname' relative and then joining the
 175     two, which is tricky on DOS/Windows and Mac OS.
 176     """
 177     if os.name == 'posix':
 178         if not os.path.isabs(pathname):
 179             return os.path.join(new_root, pathname)
 180         else:
 181             return os.path.join(new_root, pathname[1:])
 182
 183     elif os.name == 'nt':
 184         (drive, path) = os.path.splitdrive(pathname)
 185         if path[0] == '\\':
 186             path = path[1:]
 187         return os.path.join(new_root, path)
 188
 189     elif os.name == 'os2':
 190         (drive, path) = os.path.splitdrive(pathname)
 191         if path[0] == os.sep:
 192             path = path[1:]
 193         return os.path.join(new_root, path)
 194
 195     elif os.name == 'mac':
 196         if not os.path.isabs(pathname):
 197             return os.path.join(new_root, pathname)
 198         else:
 199             # Chop off volume name from start of path
 200             elements = string.split(pathname, ":", 1)
 201             pathname = ":" + elements[1]
 202             return os.path.join(new_root, pathname)
 203
 204     else:
 205         raise DistutilsPlatformError, \
 206               "nothing known about platform '%s'" % os.name
 207
 208
 209 _environ_checked = 0
 210 def check_environ ():
 211     """Ensure that 'os.environ' has all the environment variables we
 212     guarantee that users can use in config files, command-line options,
 213     etc.  Currently this includes:
 214       HOME - user's home directory (Unix only)
 215       PLAT - description of the current platform, including hardware
 216              and OS (see 'get_platform()')
 217     """
 218     global _environ_checked
 219     if _environ_checked:
 220         return
 221
 222     if os.name == 'posix' and not os.environ.has_key('HOME'):
 223         import pwd
 224         os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
 225
 226     if not os.environ.has_key('PLAT'):
 227         os.environ['PLAT'] = get_platform()
 228
 229     _environ_checked = 1
 230
 231
 232 def subst_vars (s, local_vars):
 233     """Perform shell/Perl-style variable substitution on 'string'.  Every
 234     occurrence of '$' followed by a name is considered a variable, and
 235     variable is substituted by the value found in the 'local_vars'
 236     dictionary, or in 'os.environ' if it's not in 'local_vars'.
 237     'os.environ' is first checked/augmented to guarantee that it contains
 238     certain values: see 'check_environ()'.  Raise ValueError for any
 239     variables not found in either 'local_vars' or 'os.environ'.
 240     """
 241     check_environ()
 242     def _subst (match, local_vars=local_vars):
 243         var_name = match.group(1)
 244         if local_vars.has_key(var_name):
 245             return str(local_vars[var_name])
 246         else:
 247             return os.environ[var_name]
 248
 249     try:
 250         return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
 251     except KeyError, var:
 252         raise ValueError, "invalid variable '$%s'" % var
 253
 254 # subst_vars ()
 255
 256
 257 def grok_environment_error (exc, prefix="error: "):
 258     """Generate a useful error message from an EnvironmentError (IOError or
 259     OSError) exception object.  Handles Python 1.5.1 and 1.5.2 styles, and
 260     does what it can to deal with exception objects that don't have a
 261     filename (which happens when the error is due to a two-file operation,
 262     such as 'rename()' or 'link()'.  Returns the error message as a string
 263     prefixed with 'prefix'.
 264     """
 265     # check for Python 1.5.2-style {IO,OS}Error exception objects
 266     if hasattr(exc, 'filename') and hasattr(exc, 'strerror'):
 267         if exc.filename:
 268             error = prefix + "%s: %s" % (exc.filename, exc.strerror)
 269         else:
 270             # two-argument functions in posix module don't
 271             # include the filename in the exception object!
 272             error = prefix + "%s" % exc.strerror
 273     else:
 274         error = prefix + str(exc[-1])
 275
 276     return error
 277
 278
 279 # Needed by 'split_quoted()'
 280 _wordchars_re = _squote_re = _dquote_re = None
 281 def _init_regex():
 282     global _wordchars_re, _squote_re, _dquote_re
 283     _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
 284     _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
 285     _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
 286
 287 def split_quoted (s):
 288     """Split a string up according to Unix shell-like rules for quotes and
 289     backslashes.  In short: words are delimited by spaces, as long as those
 290     spaces are not escaped by a backslash, or inside a quoted string.
 291     Single and double quotes are equivalent, and the quote characters can
 292     be backslash-escaped.  The backslash is stripped from any two-character
 293     escape sequence, leaving only the escaped character.  The quote
 294     characters are stripped from any quoted string.  Returns a list of
 295     words.
 296     """
 297
 298     # This is a nice algorithm for splitting up a single string, since it
 299     # doesn't require character-by-character examination.  It was a little
 300     # bit of a brain-bender to get it working right, though...
 301     if _wordchars_re is None: _init_regex()
 302
 303     s = string.strip(s)
 304     words = []
 305     pos = 0
 306
 307     while s:
 308         m = _wordchars_re.match(s, pos)
 309         end = m.end()
 310         if end == len(s):
 311             words.append(s[:end])
 312             break
 313
 314         if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
 315             words.append(s[:end])       # we definitely have a word delimiter
 316             s = string.lstrip(s[end:])
 317             pos = 0
 318
 319         elif s[end] == '\\':            # preserve whatever is being escaped;
 320                                         # will become part of the current word
 321             s = s[:end] + s[end+1:]
 322             pos = end+1
 323
 324         else:
 325             if s[end] == "'":           # slurp singly-quoted string
 326                 m = _squote_re.match(s, end)
 327             elif s[end] == '"':         # slurp doubly-quoted string
 328                 m = _dquote_re.match(s, end)
 329             else:
 330                 raise RuntimeError, \
 331                       "this can't happen (bad char '%c')" % s[end]
 332
 333             if m is None:
 334                 raise ValueError, \
 335                       "bad string (mismatched %s quotes?)" % s[end]
 336
 337             (beg, end) = m.span()
 338             s = s[:beg] + s[beg+1:end-1] + s[end:]
 339             pos = m.end() - 2
 340
 341         if pos >= len(s):
 342             words.append(s)
 343             break
 344
 345     return words
 346
 347 # split_quoted ()
 348
 349
 350 def execute (func, args, msg=None, verbose=0, dry_run=0):
 351     """Perform some action that affects the outside world (eg.  by
 352     writing to the filesystem).  Such actions are special because they
 353     are disabled by the 'dry_run' flag.  This method takes care of all
 354     that bureaucracy for you; all you have to do is supply the
 355     function to call and an argument tuple for it (to embody the
 356     "external action" being performed), and an optional message to
 357     print.
 358     """
 359     if msg is None:
 360         msg = "%s%r" % (func.__name__, args)
 361         if msg[-2:] == ',)':        # correct for singleton tuple
 362             msg = msg[0:-2] + ')'
 363
 364     log.info(msg)
 365     if not dry_run:
 366         apply(func, args)
 367
 368
 369 def strtobool (val):
 370     """Convert a string representation of truth to true (1) or false (0).
 371
 372     True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
 373     are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
 374     'val' is anything else.
 375     """
 376     val = string.lower(val)
 377     if val in ('y', 'yes', 't', 'true', 'on', '1'):
 378         return 1
 379     elif val in ('n', 'no', 'f', 'false', 'off', '0'):
 380         return 0
 381     else:
 382         raise ValueError, "invalid truth value %r" % (val,)
 383
 384
 385 def byte_compile (py_files,
 386                   optimize=0, force=0,
 387                   prefix=None, base_dir=None,
 388                   verbose=1, dry_run=0,
 389                   direct=None):
 390     """Byte-compile a collection of Python source files to either .pyc
 391     or .pyo files in the same directory.  'py_files' is a list of files
 392     to compile; any files that don't end in ".py" are silently skipped.
 393     'optimize' must be one of the following:
 394       0 - don't optimize (generate .pyc)
 395       1 - normal optimization (like "python -O")
 396       2 - extra optimization (like "python -OO")
 397     If 'force' is true, all files are recompiled regardless of
 398     timestamps.
 399
 400     The source filename encoded in each bytecode file defaults to the
 401     filenames listed in 'py_files'; you can modify these with 'prefix' and
 402     'basedir'.  'prefix' is a string that will be stripped off of each
 403     source filename, and 'base_dir' is a directory name that will be
 404     prepended (after 'prefix' is stripped).  You can supply either or both
 405     (or neither) of 'prefix' and 'base_dir', as you wish.
 406
 407     If 'dry_run' is true, doesn't actually do anything that would
 408     affect the filesystem.
 409
 410     Byte-compilation is either done directly in this interpreter process
 411     with the standard py_compile module, or indirectly by writing a
 412     temporary script and executing it.  Normally, you should let
 413     'byte_compile()' figure out to use direct compilation or not (see
 414     the source for details).  The 'direct' flag is used by the script
 415     generated in indirect mode; unless you know what you're doing, leave
 416     it set to None.
 417     """
 418
 419     # First, if the caller didn't force us into direct or indirect mode,
 420     # figure out which mode we should be in.  We take a conservative
 421     # approach: choose direct mode *only* if the current interpreter is
 422     # in debug mode and optimize is 0.  If we're not in debug mode (-O
 423     # or -OO), we don't know which level of optimization this
 424     # interpreter is running with, so we can't do direct
 425     # byte-compilation and be certain that it's the right thing.  Thus,
 426     # always compile indirectly if the current interpreter is in either
 427     # optimize mode, or if either optimization level was requested by
 428     # the caller.
 429     if direct is None:
 430         direct = (__debug__ and optimize == 0)
 431
 432     # "Indirect" byte-compilation: write a temporary script and then
 433     # run it with the appropriate flags.
 434     if not direct:
 435         try:
 436             from tempfile import mkstemp
 437             (script_fd, script_name) = mkstemp(".py")
 438         except ImportError:
 439             from tempfile import mktemp
 440             (script_fd, script_name) = None, mktemp(".py")
 441         log.info("writing byte-compilation script '%s'", script_name)
 442         if not dry_run:
 443             if script_fd is not None:
 444                 script = os.fdopen(script_fd, "w")
 445             else:
 446                 script = open(script_name, "w")
 447
 448             script.write("""\
 449 from distutils.util import byte_compile
 450 files = [
 451 """)
 452
 453             # XXX would be nice to write absolute filenames, just for
 454             # safety's sake (script should be more robust in the face of
 455             # chdir'ing before running it).  But this requires abspath'ing
 456             # 'prefix' as well, and that breaks the hack in build_lib's
 457             # 'byte_compile()' method that carefully tacks on a trailing
 458             # slash (os.sep really) to make sure the prefix here is "just
 459             # right".  This whole prefix business is rather delicate -- the
 460             # problem is that it's really a directory, but I'm treating it
 461             # as a dumb string, so trailing slashes and so forth matter.
 462
 463             #py_files = map(os.path.abspath, py_files)
 464             #if prefix:
 465             #    prefix = os.path.abspath(prefix)
 466
 467             script.write(string.join(map(repr, py_files), ",\n") + "]\n")
 468             script.write("""
 469 byte_compile(files, optimize=%r, force=%r,
 470              prefix=%r, base_dir=%r,
 471              verbose=%r, dry_run=0,
 472              direct=1)
 473 """ % (optimize, force, prefix, base_dir, verbose))
 474
 475             script.close()
 476
 477         cmd = [sys.executable, script_name]
 478         if optimize == 1:
 479             cmd.insert(1, "-O")
 480         elif optimize == 2:
 481             cmd.insert(1, "-OO")
 482         spawn(cmd, dry_run=dry_run)
 483         execute(os.remove, (script_name,), "removing %s" % script_name,
 484                 dry_run=dry_run)
 485
 486     # "Direct" byte-compilation: use the py_compile module to compile
 487     # right here, right now.  Note that the script generated in indirect
 488     # mode simply calls 'byte_compile()' in direct mode, a weird sort of
 489     # cross-process recursion.  Hey, it works!
 490     else:
 491         from py_compile import compile
 492
 493         for file in py_files:
 494             if file[-3:] != ".py":
 495                 # This lets us be lazy and not filter filenames in
 496                 # the "install_lib" command.
 497                 continue
 498
 499             # Terminology from the py_compile module:
 500             #   cfile - byte-compiled file
 501             #   dfile - purported source filename (same as 'file' by default)
 502             cfile = file + (__debug__ and "c" or "o")
 503             dfile = file
 504             if prefix:
 505                 if file[:len(prefix)] != prefix:
 506                     raise ValueError, \
 507                           ("invalid prefix: filename %r doesn't start with %r"
 508                            % (file, prefix))
 509                 dfile = dfile[len(prefix):]
 510             if base_dir:
 511                 dfile = os.path.join(base_dir, dfile)
 512
 513             cfile_base = os.path.basename(cfile)
 514             if direct:
 515                 if force or newer(file, cfile):
 516                     log.info("byte-compiling %s to %s", file, cfile_base)
 517                     if not dry_run:
 518                         compile(file, cfile, dfile)
 519                 else:
 520                     log.debug("skipping byte-compilation of %s to %s",
 521                               file, cfile_base)
 522
 523 # byte_compile ()
 524
 525 def rfc822_escape (header):
 526     """Return a version of the string escaped for inclusion in an
 527     RFC-822 header, by ensuring there are 8 spaces space after each newline.
 528     """
 529     lines = string.split(header, '\n')
 530     lines = map(string.strip, lines)
 531     header = string.join(lines, '\n' + 8*' ')
 532     return header