Add missing issue number in Misc/NEWS entry.
[python.git] / Lib / distutils / util.py
blob6bff44f786d344a868dcadd4fc16f90f66421f72
1 """distutils.util
3 Miscellaneous utility functions -- anything that doesn't fit into
4 one of the other *util.py modules.
5 """
7 __revision__ = "$Id$"
9 import sys, os, string, re
11 from distutils.errors import DistutilsPlatformError
12 from distutils.dep_util import newer
13 from distutils.spawn import spawn, find_executable
14 from distutils import log
15 from distutils.version import LooseVersion
16 from distutils.errors import DistutilsByteCompileError
18 def get_platform():
19 """Return a string that identifies the current platform.
21 This is used mainly to distinguish platform-specific build directories and
22 platform-specific built distributions. Typically includes the OS name
23 and version and the architecture (as supplied by 'os.uname()'),
24 although the exact information included depends on the OS; eg. for IRIX
25 the architecture isn't particularly important (IRIX only runs on SGI
26 hardware), but for Linux the kernel version isn't particularly
27 important.
29 Examples of returned values:
30 linux-i586
31 linux-alpha (?)
32 solaris-2.6-sun4u
33 irix-5.3
34 irix64-6.2
36 Windows will return one of:
37 win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
38 win-ia64 (64bit Windows on Itanium)
39 win32 (all others - specifically, sys.platform is returned)
41 For other non-POSIX platforms, currently just returns 'sys.platform'.
42 """
43 if os.name == 'nt':
44 # sniff sys.version for architecture.
45 prefix = " bit ("
46 i = sys.version.find(prefix)
47 if i == -1:
48 return sys.platform
49 j = sys.version.find(")", i)
50 look = sys.version[i+len(prefix):j].lower()
51 if look == 'amd64':
52 return 'win-amd64'
53 if look == 'itanium':
54 return 'win-ia64'
55 return sys.platform
57 if os.name != "posix" or not hasattr(os, 'uname'):
58 # XXX what about the architecture? NT is Intel or Alpha,
59 # Mac OS is M68k or PPC, etc.
60 return sys.platform
62 # Try to distinguish various flavours of Unix
64 (osname, host, release, version, machine) = os.uname()
66 # Convert the OS name to lowercase, remove '/' characters
67 # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
68 osname = osname.lower().replace('/', '')
69 machine = machine.replace(' ', '_')
70 machine = machine.replace('/', '-')
72 if osname[:5] == "linux":
73 # At least on Linux/Intel, 'machine' is the processor --
74 # i386, etc.
75 # XXX what about Alpha, SPARC, etc?
76 return "%s-%s" % (osname, machine)
77 elif osname[:5] == "sunos":
78 if release[0] >= "5": # SunOS 5 == Solaris 2
79 osname = "solaris"
80 release = "%d.%s" % (int(release[0]) - 3, release[2:])
81 # fall through to standard osname-release-machine representation
82 elif osname[:4] == "irix": # could be "irix64"!
83 return "%s-%s" % (osname, release)
84 elif osname[:3] == "aix":
85 return "%s-%s.%s" % (osname, version, release)
86 elif osname[:6] == "cygwin":
87 osname = "cygwin"
88 rel_re = re.compile (r'[\d.]+')
89 m = rel_re.match(release)
90 if m:
91 release = m.group()
92 elif osname[:6] == "darwin":
94 # For our purposes, we'll assume that the system version from
95 # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
96 # to. This makes the compatibility story a bit more sane because the
97 # machine is going to compile and link as if it were
98 # MACOSX_DEPLOYMENT_TARGET.
99 from distutils.sysconfig import get_config_vars
100 cfgvars = get_config_vars()
102 macver = os.environ.get('MACOSX_DEPLOYMENT_TARGET')
103 if not macver:
104 macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
106 if 1:
107 # Always calculate the release of the running machine,
108 # needed to determine if we can build fat binaries or not.
110 macrelease = macver
111 # Get the system version. Reading this plist is a documented
112 # way to get the system version (see the documentation for
113 # the Gestalt Manager)
114 try:
115 f = open('/System/Library/CoreServices/SystemVersion.plist')
116 except IOError:
117 # We're on a plain darwin box, fall back to the default
118 # behaviour.
119 pass
120 else:
121 m = re.search(
122 r'<key>ProductUserVisibleVersion</key>\s*' +
123 r'<string>(.*?)</string>', f.read())
124 f.close()
125 if m is not None:
126 macrelease = '.'.join(m.group(1).split('.')[:2])
127 # else: fall back to the default behaviour
129 if not macver:
130 macver = macrelease
132 if macver:
133 from distutils.sysconfig import get_config_vars
134 release = macver
135 osname = "macosx"
137 if (macrelease + '.') >= '10.4.' and \
138 '-arch' in get_config_vars().get('CFLAGS', '').strip():
139 # The universal build will build fat binaries, but not on
140 # systems before 10.4
142 # Try to detect 4-way universal builds, those have machine-type
143 # 'universal' instead of 'fat'.
145 machine = 'fat'
146 cflags = get_config_vars().get('CFLAGS')
148 archs = re.findall('-arch\s+(\S+)', cflags)
149 archs.sort()
150 archs = tuple(archs)
152 if len(archs) == 1:
153 machine = archs[0]
154 elif archs == ('i386', 'ppc'):
155 machine = 'fat'
156 elif archs == ('i386', 'x86_64'):
157 machine = 'intel'
158 elif archs == ('i386', 'ppc', 'x86_64'):
159 machine = 'fat3'
160 elif archs == ('ppc64', 'x86_64'):
161 machine = 'fat64'
162 elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
163 machine = 'universal'
164 else:
165 raise ValueError(
166 "Don't know machine value for archs=%r"%(archs,))
169 elif machine in ('PowerPC', 'Power_Macintosh'):
170 # Pick a sane name for the PPC architecture.
171 machine = 'ppc'
173 return "%s-%s-%s" % (osname, release, machine)
176 def convert_path(pathname):
177 """Return 'pathname' as a name that will work on the native filesystem.
179 i.e. split it on '/' and put it back together again using the current
180 directory separator. Needed because filenames in the setup script are
181 always supplied in Unix style, and have to be converted to the local
182 convention before we can actually use them in the filesystem. Raises
183 ValueError on non-Unix-ish systems if 'pathname' either starts or
184 ends with a slash.
186 if os.sep == '/':
187 return pathname
188 if not pathname:
189 return pathname
190 if pathname[0] == '/':
191 raise ValueError("path '%s' cannot be absolute" % pathname)
192 if pathname[-1] == '/':
193 raise ValueError("path '%s' cannot end with '/'" % pathname)
195 paths = pathname.split('/')
196 while '.' in paths:
197 paths.remove('.')
198 if not paths:
199 return os.curdir
200 return os.path.join(*paths)
203 def change_root(new_root, pathname):
204 """Return 'pathname' with 'new_root' prepended.
206 If 'pathname' is relative, this is equivalent to
207 "os.path.join(new_root,pathname)".
208 Otherwise, it requires making 'pathname' relative and then joining the
209 two, which is tricky on DOS/Windows and Mac OS.
211 if os.name == 'posix':
212 if not os.path.isabs(pathname):
213 return os.path.join(new_root, pathname)
214 else:
215 return os.path.join(new_root, pathname[1:])
217 elif os.name == 'nt':
218 (drive, path) = os.path.splitdrive(pathname)
219 if path[0] == '\\':
220 path = path[1:]
221 return os.path.join(new_root, path)
223 elif os.name == 'os2':
224 (drive, path) = os.path.splitdrive(pathname)
225 if path[0] == os.sep:
226 path = path[1:]
227 return os.path.join(new_root, path)
229 elif os.name == 'mac':
230 if not os.path.isabs(pathname):
231 return os.path.join(new_root, pathname)
232 else:
233 # Chop off volume name from start of path
234 elements = pathname.split(":", 1)
235 pathname = ":" + elements[1]
236 return os.path.join(new_root, pathname)
238 else:
239 raise DistutilsPlatformError("nothing known about "
240 "platform '%s'" % os.name)
242 _environ_checked = 0
244 def check_environ():
245 """Ensure that 'os.environ' has all the environment variables needed.
247 We guarantee that users can use in config files, command-line options,
248 etc. Currently this includes:
249 HOME - user's home directory (Unix only)
250 PLAT - description of the current platform, including hardware
251 and OS (see 'get_platform()')
253 global _environ_checked
254 if _environ_checked:
255 return
257 if os.name == 'posix' and 'HOME' not in os.environ:
258 import pwd
259 os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
261 if 'PLAT' not in os.environ:
262 os.environ['PLAT'] = get_platform()
264 _environ_checked = 1
266 def subst_vars(s, local_vars):
267 """Perform shell/Perl-style variable substitution on 'string'.
269 Every occurrence of '$' followed by a name is considered a variable, and
270 variable is substituted by the value found in the 'local_vars'
271 dictionary, or in 'os.environ' if it's not in 'local_vars'.
272 'os.environ' is first checked/augmented to guarantee that it contains
273 certain values: see 'check_environ()'. Raise ValueError for any
274 variables not found in either 'local_vars' or 'os.environ'.
276 check_environ()
277 def _subst (match, local_vars=local_vars):
278 var_name = match.group(1)
279 if var_name in local_vars:
280 return str(local_vars[var_name])
281 else:
282 return os.environ[var_name]
284 try:
285 return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
286 except KeyError, var:
287 raise ValueError("invalid variable '$%s'" % var)
289 def grok_environment_error(exc, prefix="error: "):
290 """Generate a useful error message from an EnvironmentError.
292 This will generate an IOError or an OSError exception object.
293 Handles Python 1.5.1 and 1.5.2 styles, and
294 does what it can to deal with exception objects that don't have a
295 filename (which happens when the error is due to a two-file operation,
296 such as 'rename()' or 'link()'. Returns the error message as a string
297 prefixed with 'prefix'.
299 # check for Python 1.5.2-style {IO,OS}Error exception objects
300 if hasattr(exc, 'filename') and hasattr(exc, 'strerror'):
301 if exc.filename:
302 error = prefix + "%s: %s" % (exc.filename, exc.strerror)
303 else:
304 # two-argument functions in posix module don't
305 # include the filename in the exception object!
306 error = prefix + "%s" % exc.strerror
307 else:
308 error = prefix + str(exc[-1])
310 return error
312 # Needed by 'split_quoted()'
313 _wordchars_re = _squote_re = _dquote_re = None
315 def _init_regex():
316 global _wordchars_re, _squote_re, _dquote_re
317 _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
318 _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
319 _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
321 def split_quoted(s):
322 """Split a string up according to Unix shell-like rules for quotes and
323 backslashes.
325 In short: words are delimited by spaces, as long as those
326 spaces are not escaped by a backslash, or inside a quoted string.
327 Single and double quotes are equivalent, and the quote characters can
328 be backslash-escaped. The backslash is stripped from any two-character
329 escape sequence, leaving only the escaped character. The quote
330 characters are stripped from any quoted string. Returns a list of
331 words.
333 # This is a nice algorithm for splitting up a single string, since it
334 # doesn't require character-by-character examination. It was a little
335 # bit of a brain-bender to get it working right, though...
336 if _wordchars_re is None: _init_regex()
338 s = s.strip()
339 words = []
340 pos = 0
342 while s:
343 m = _wordchars_re.match(s, pos)
344 end = m.end()
345 if end == len(s):
346 words.append(s[:end])
347 break
349 if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
350 words.append(s[:end]) # we definitely have a word delimiter
351 s = s[end:].lstrip()
352 pos = 0
354 elif s[end] == '\\': # preserve whatever is being escaped;
355 # will become part of the current word
356 s = s[:end] + s[end+1:]
357 pos = end+1
359 else:
360 if s[end] == "'": # slurp singly-quoted string
361 m = _squote_re.match(s, end)
362 elif s[end] == '"': # slurp doubly-quoted string
363 m = _dquote_re.match(s, end)
364 else:
365 raise RuntimeError("this can't happen "
366 "(bad char '%c')" % s[end])
368 if m is None:
369 raise ValueError("bad string (mismatched %s quotes?)" % s[end])
371 (beg, end) = m.span()
372 s = s[:beg] + s[beg+1:end-1] + s[end:]
373 pos = m.end() - 2
375 if pos >= len(s):
376 words.append(s)
377 break
379 return words
382 def execute(func, args, msg=None, verbose=0, dry_run=0):
383 """Perform some action that affects the outside world.
385 eg. by writing to the filesystem). Such actions are special because
386 they are disabled by the 'dry_run' flag. This method takes care of all
387 that bureaucracy for you; all you have to do is supply the
388 function to call and an argument tuple for it (to embody the
389 "external action" being performed), and an optional message to
390 print.
392 if msg is None:
393 msg = "%s%r" % (func.__name__, args)
394 if msg[-2:] == ',)': # correct for singleton tuple
395 msg = msg[0:-2] + ')'
397 log.info(msg)
398 if not dry_run:
399 func(*args)
402 def strtobool(val):
403 """Convert a string representation of truth to true (1) or false (0).
405 True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
406 are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if
407 'val' is anything else.
409 val = val.lower()
410 if val in ('y', 'yes', 't', 'true', 'on', '1'):
411 return 1
412 elif val in ('n', 'no', 'f', 'false', 'off', '0'):
413 return 0
414 else:
415 raise ValueError, "invalid truth value %r" % (val,)
418 def byte_compile(py_files, optimize=0, force=0, prefix=None, base_dir=None,
419 verbose=1, dry_run=0, direct=None):
420 """Byte-compile a collection of Python source files to either .pyc
421 or .pyo files in the same directory.
423 'py_files' is a list of files to compile; any files that don't end in
424 ".py" are silently skipped. 'optimize' must be one of the following:
425 0 - don't optimize (generate .pyc)
426 1 - normal optimization (like "python -O")
427 2 - extra optimization (like "python -OO")
428 If 'force' is true, all files are recompiled regardless of
429 timestamps.
431 The source filename encoded in each bytecode file defaults to the
432 filenames listed in 'py_files'; you can modify these with 'prefix' and
433 'basedir'. 'prefix' is a string that will be stripped off of each
434 source filename, and 'base_dir' is a directory name that will be
435 prepended (after 'prefix' is stripped). You can supply either or both
436 (or neither) of 'prefix' and 'base_dir', as you wish.
438 If 'dry_run' is true, doesn't actually do anything that would
439 affect the filesystem.
441 Byte-compilation is either done directly in this interpreter process
442 with the standard py_compile module, or indirectly by writing a
443 temporary script and executing it. Normally, you should let
444 'byte_compile()' figure out to use direct compilation or not (see
445 the source for details). The 'direct' flag is used by the script
446 generated in indirect mode; unless you know what you're doing, leave
447 it set to None.
449 # nothing is done if sys.dont_write_bytecode is True
450 if sys.dont_write_bytecode:
451 raise DistutilsByteCompileError('byte-compiling is disabled.')
453 # First, if the caller didn't force us into direct or indirect mode,
454 # figure out which mode we should be in. We take a conservative
455 # approach: choose direct mode *only* if the current interpreter is
456 # in debug mode and optimize is 0. If we're not in debug mode (-O
457 # or -OO), we don't know which level of optimization this
458 # interpreter is running with, so we can't do direct
459 # byte-compilation and be certain that it's the right thing. Thus,
460 # always compile indirectly if the current interpreter is in either
461 # optimize mode, or if either optimization level was requested by
462 # the caller.
463 if direct is None:
464 direct = (__debug__ and optimize == 0)
466 # "Indirect" byte-compilation: write a temporary script and then
467 # run it with the appropriate flags.
468 if not direct:
469 try:
470 from tempfile import mkstemp
471 (script_fd, script_name) = mkstemp(".py")
472 except ImportError:
473 from tempfile import mktemp
474 (script_fd, script_name) = None, mktemp(".py")
475 log.info("writing byte-compilation script '%s'", script_name)
476 if not dry_run:
477 if script_fd is not None:
478 script = os.fdopen(script_fd, "w")
479 else:
480 script = open(script_name, "w")
482 script.write("""\
483 from distutils.util import byte_compile
484 files = [
485 """)
487 # XXX would be nice to write absolute filenames, just for
488 # safety's sake (script should be more robust in the face of
489 # chdir'ing before running it). But this requires abspath'ing
490 # 'prefix' as well, and that breaks the hack in build_lib's
491 # 'byte_compile()' method that carefully tacks on a trailing
492 # slash (os.sep really) to make sure the prefix here is "just
493 # right". This whole prefix business is rather delicate -- the
494 # problem is that it's really a directory, but I'm treating it
495 # as a dumb string, so trailing slashes and so forth matter.
497 #py_files = map(os.path.abspath, py_files)
498 #if prefix:
499 # prefix = os.path.abspath(prefix)
501 script.write(",\n".join(map(repr, py_files)) + "]\n")
502 script.write("""
503 byte_compile(files, optimize=%r, force=%r,
504 prefix=%r, base_dir=%r,
505 verbose=%r, dry_run=0,
506 direct=1)
507 """ % (optimize, force, prefix, base_dir, verbose))
509 script.close()
511 cmd = [sys.executable, script_name]
512 if optimize == 1:
513 cmd.insert(1, "-O")
514 elif optimize == 2:
515 cmd.insert(1, "-OO")
516 spawn(cmd, dry_run=dry_run)
517 execute(os.remove, (script_name,), "removing %s" % script_name,
518 dry_run=dry_run)
520 # "Direct" byte-compilation: use the py_compile module to compile
521 # right here, right now. Note that the script generated in indirect
522 # mode simply calls 'byte_compile()' in direct mode, a weird sort of
523 # cross-process recursion. Hey, it works!
524 else:
525 from py_compile import compile
527 for file in py_files:
528 if file[-3:] != ".py":
529 # This lets us be lazy and not filter filenames in
530 # the "install_lib" command.
531 continue
533 # Terminology from the py_compile module:
534 # cfile - byte-compiled file
535 # dfile - purported source filename (same as 'file' by default)
536 cfile = file + (__debug__ and "c" or "o")
537 dfile = file
538 if prefix:
539 if file[:len(prefix)] != prefix:
540 raise ValueError("invalid prefix: filename %r doesn't "
541 "start with %r" % (file, prefix))
542 dfile = dfile[len(prefix):]
543 if base_dir:
544 dfile = os.path.join(base_dir, dfile)
546 cfile_base = os.path.basename(cfile)
547 if direct:
548 if force or newer(file, cfile):
549 log.info("byte-compiling %s to %s", file, cfile_base)
550 if not dry_run:
551 compile(file, cfile, dfile)
552 else:
553 log.debug("skipping byte-compilation of %s to %s",
554 file, cfile_base)
557 def rfc822_escape(header):
558 """Return a version of the string escaped for inclusion in an
559 RFC-822 header, by ensuring there are 8 spaces space after each newline.
561 lines = [x.strip() for x in header.split('\n')]
562 sep = '\n' + 8*' '
563 return sep.join(lines)
565 _RE_VERSION = re.compile('(\d+\.\d+(\.\d+)*)')
566 _MAC_OS_X_LD_VERSION = re.compile('^@\(#\)PROGRAM:ld PROJECT:ld64-((\d+)(\.\d+)*)')
568 def _find_ld_version():
569 """Finds the ld version. The version scheme differs under Mac OSX."""
570 if sys.platform == 'darwin':
571 return _find_exe_version('ld -v', _MAC_OS_X_LD_VERSION)
572 else:
573 return _find_exe_version('ld -v')
575 def _find_exe_version(cmd, pattern=_RE_VERSION):
576 """Find the version of an executable by running `cmd` in the shell.
578 `pattern` is a compiled regular expression. If not provided, default
579 to _RE_VERSION. If the command is not found, or the output does not
580 match the mattern, returns None.
582 from subprocess import Popen, PIPE
583 executable = cmd.split()[0]
584 if find_executable(executable) is None:
585 return None
586 pipe = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
587 try:
588 stdout, stderr = pipe.stdout.read(), pipe.stderr.read()
589 finally:
590 pipe.stdout.close()
591 pipe.stderr.close()
592 # some commands like ld under MacOS X, will give the
593 # output in the stderr, rather than stdout.
594 if stdout != '':
595 out_string = stdout
596 else:
597 out_string = stderr
599 result = pattern.search(out_string)
600 if result is None:
601 return None
602 return LooseVersion(result.group(1))
604 def get_compiler_versions():
605 """Returns a tuple providing the versions of gcc, ld and dllwrap
607 For each command, if a command is not found, None is returned.
608 Otherwise a LooseVersion instance is returned.
610 gcc = _find_exe_version('gcc -dumpversion')
611 ld = _find_ld_version()
612 dllwrap = _find_exe_version('dllwrap --version')
613 return gcc, ld, dllwrap