Merged revisions 81181 via svnmerge from
[python/dscho.git] / Lib / ntpath.py
blob1cec8954d036972e7a9bbb71819eddf090571597
1 # Module 'ntpath' -- common operations on WinNT/Win95 pathnames
2 """Common pathname manipulations, WindowsNT/95 version.
4 Instead of importing this module directly, import os and refer to this
5 module as os.path.
6 """
8 import os
9 import sys
10 import stat
11 import genericpath
12 from genericpath import *
14 __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
15 "basename","dirname","commonprefix","getsize","getmtime",
16 "getatime","getctime", "islink","exists","lexists","isdir","isfile",
17 "ismount", "expanduser","expandvars","normpath","abspath",
18 "splitunc","curdir","pardir","sep","pathsep","defpath","altsep",
19 "extsep","devnull","realpath","supports_unicode_filenames","relpath"]
21 # strings representing various path-related bits and pieces
22 # These are primarily for export; internally, they are hardcoded.
23 curdir = '.'
24 pardir = '..'
25 extsep = '.'
26 sep = '\\'
27 pathsep = ';'
28 altsep = '/'
29 defpath = '.;C:\\bin'
30 if 'ce' in sys.builtin_module_names:
31 defpath = '\\Windows'
32 elif 'os2' in sys.builtin_module_names:
33 # OS/2 w/ VACPP
34 altsep = '/'
35 devnull = 'nul'
37 def _get_empty(path):
38 if isinstance(path, bytes):
39 return b''
40 else:
41 return ''
43 def _get_sep(path):
44 if isinstance(path, bytes):
45 return b'\\'
46 else:
47 return '\\'
49 def _get_altsep(path):
50 if isinstance(path, bytes):
51 return b'/'
52 else:
53 return '/'
55 def _get_bothseps(path):
56 if isinstance(path, bytes):
57 return b'\\/'
58 else:
59 return '\\/'
61 def _get_dot(path):
62 if isinstance(path, bytes):
63 return b'.'
64 else:
65 return '.'
67 def _get_colon(path):
68 if isinstance(path, bytes):
69 return b':'
70 else:
71 return ':'
73 # Normalize the case of a pathname and map slashes to backslashes.
74 # Other normalizations (such as optimizing '../' away) are not done
75 # (this is done by normpath).
77 def normcase(s):
78 """Normalize case of pathname.
80 Makes all characters lowercase and all slashes into backslashes."""
81 return s.replace(_get_altsep(s), _get_sep(s)).lower()
84 # Return whether a path is absolute.
85 # Trivial in Posix, harder on Windows.
86 # For Windows it is absolute if it starts with a slash or backslash (current
87 # volume), or if a pathname after the volume-letter-and-colon or UNC-resource
88 # starts with a slash or backslash.
90 def isabs(s):
91 """Test whether a path is absolute"""
92 s = splitdrive(s)[1]
93 return len(s) > 0 and s[:1] in _get_bothseps(s)
96 # Join two (or more) paths.
98 def join(a, *p):
99 """Join two or more pathname components, inserting "\\" as needed.
100 If any component is an absolute path, all previous path components
101 will be discarded."""
102 sep = _get_sep(a)
103 seps = _get_bothseps(a)
104 colon = _get_colon(a)
105 path = a
106 for b in p:
107 b_wins = 0 # set to 1 iff b makes path irrelevant
108 if not path:
109 b_wins = 1
111 elif isabs(b):
112 # This probably wipes out path so far. However, it's more
113 # complicated if path begins with a drive letter. You get a+b
114 # (minus redundant slashes) in these four cases:
115 # 1. join('c:', '/a') == 'c:/a'
116 # 2. join('//computer/share', '/a') == '//computer/share/a'
117 # 3. join('c:/', '/a') == 'c:/a'
118 # 4. join('//computer/share/', '/a') == '//computer/share/a'
119 # But b wins in all of these cases:
120 # 5. join('c:/a', '/b') == '/b'
121 # 6. join('//computer/share/a', '/b') == '/b'
122 # 7. join('c:', 'd:/') == 'd:/'
123 # 8. join('c:', '//computer/share/') == '//computer/share/'
124 # 9. join('//computer/share', 'd:/') == 'd:/'
125 # 10. join('//computer/share', '//computer/share/') == '//computer/share/'
126 # 11. join('c:/', 'd:/') == 'd:/'
127 # 12. join('c:/', '//computer/share/') == '//computer/share/'
128 # 13. join('//computer/share/', 'd:/') == 'd:/'
129 # 14. join('//computer/share/', '//computer/share/') == '//computer/share/'
130 b_prefix, b_rest = splitdrive(b)
132 # if b has a prefix, it always wins.
133 if b_prefix:
134 b_wins = 1
135 else:
136 # b doesn't have a prefix.
137 # but isabs(b) returned true.
138 # and therefore b_rest[0] must be a slash.
139 # (but let's check that.)
140 assert(b_rest and b_rest[0] in seps)
142 # so, b still wins if path has a rest that's more than a sep.
143 # you get a+b if path_rest is empty or only has a sep.
144 # (see cases 1-4 for times when b loses.)
145 path_rest = splitdrive(path)[1]
146 b_wins = path_rest and path_rest not in seps
148 if b_wins:
149 path = b
150 else:
151 # Join, and ensure there's a separator.
152 assert len(path) > 0
153 if path[-1:] in seps:
154 if b and b[:1] in seps:
155 path += b[1:]
156 else:
157 path += b
158 elif path[-1:] == colon:
159 path += b
160 elif b:
161 if b[:1] in seps:
162 path += b
163 else:
164 path += sep + b
165 else:
166 # path is not empty and does not end with a backslash,
167 # but b is empty; since, e.g., split('a/') produces
168 # ('a', ''), it's best if join() adds a backslash in
169 # this case.
170 path += sep
172 return path
175 # Split a path in a drive specification (a drive letter followed by a
176 # colon) and the path specification.
177 # It is always true that drivespec + pathspec == p
178 def splitdrive(p):
179 """Split a pathname into drive/UNC sharepoint and relative path specifiers.
180 Returns a 2-tuple (drive_or_unc, path); either part may be empty.
182 If you assign
183 result = splitdrive(p)
184 It is always true that:
185 result[0] + result[1] == p
187 If the path contained a drive letter, drive_or_unc will contain everything
188 up to and including the colon. e.g. splitdrive("c:/dir") returns ("c:", "/dir")
190 If the path contained a UNC path, the drive_or_unc will contain the host name
191 and share up to but not including the fourth directory separator character.
192 e.g. splitdrive("//host/computer/dir") returns ("//host/computer", "/dir")
194 Paths cannot contain both a drive letter and a UNC path.
197 empty = _get_empty(p)
198 if len(p) > 1:
199 sep = _get_sep(p)
200 normp = normcase(p)
201 if (normp[0:2] == sep*2) and (normp[2:3] != sep):
202 # is a UNC path:
203 # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
204 # \\machine\mountpoint\directory\etc\...
205 # directory ^^^^^^^^^^^^^^^
206 index = normp.find(sep, 2)
207 if index == -1:
208 return empty, p
209 index2 = normp.find(sep, index + 1)
210 # a UNC path can't have two slashes in a row
211 # (after the initial two)
212 if index2 == index + 1:
213 return empty, p
214 if index2 == -1:
215 index2 = len(p)
216 return p[:index2], p[index2:]
217 if normp[1:2] == _get_colon(p):
218 return p[:2], p[2:]
219 return empty, p
222 # Parse UNC paths
223 def splitunc(p):
224 """Deprecated since Python 3.1. Please use splitdrive() instead;
225 it now handles UNC paths.
227 Split a pathname into UNC mount point and relative path specifiers.
229 Return a 2-tuple (unc, rest); either part may be empty.
230 If unc is not empty, it has the form '//host/mount' (or similar
231 using backslashes). unc+rest is always the input path.
232 Paths containing drive letters never have an UNC part.
234 import warnings
235 warnings.warn("ntpath.splitunc is deprecated, use ntpath.splitdrive instead",
236 PendingDeprecationWarning)
237 sep = _get_sep(p)
238 if not p[1:2]:
239 return p[:0], p # Drive letter present
240 firstTwo = p[0:2]
241 if normcase(firstTwo) == sep + sep:
242 # is a UNC path:
243 # vvvvvvvvvvvvvvvvvvvv equivalent to drive letter
244 # \\machine\mountpoint\directories...
245 # directory ^^^^^^^^^^^^^^^
246 normp = normcase(p)
247 index = normp.find(sep, 2)
248 if index == -1:
249 ##raise RuntimeError, 'illegal UNC path: "' + p + '"'
250 return (p[:0], p)
251 index = normp.find(sep, index + 1)
252 if index == -1:
253 index = len(p)
254 return p[:index], p[index:]
255 return p[:0], p
258 # Split a path in head (everything up to the last '/') and tail (the
259 # rest). After the trailing '/' is stripped, the invariant
260 # join(head, tail) == p holds.
261 # The resulting head won't end in '/' unless it is the root.
263 def split(p):
264 """Split a pathname.
266 Return tuple (head, tail) where tail is everything after the final slash.
267 Either part may be empty."""
269 seps = _get_bothseps(p)
270 d, p = splitdrive(p)
271 # set i to index beyond p's last slash
272 i = len(p)
273 while i and p[i-1] not in seps:
274 i = i - 1
275 head, tail = p[:i], p[i:] # now tail has no slashes
276 # remove trailing slashes from head, unless it's all slashes
277 head2 = head
278 while head2 and head2[-1:] in seps:
279 head2 = head2[:-1]
280 head = head2 or head
281 return d + head, tail
284 # Split a path in root and extension.
285 # The extension is everything starting at the last dot in the last
286 # pathname component; the root is everything before that.
287 # It is always true that root + ext == p.
289 def splitext(p):
290 return genericpath._splitext(p, _get_sep(p), _get_altsep(p),
291 _get_dot(p))
292 splitext.__doc__ = genericpath._splitext.__doc__
295 # Return the tail (basename) part of a path.
297 def basename(p):
298 """Returns the final component of a pathname"""
299 return split(p)[1]
302 # Return the head (dirname) part of a path.
304 def dirname(p):
305 """Returns the directory component of a pathname"""
306 return split(p)[0]
308 # Is a path a symbolic link?
309 # This will always return false on systems where posix.lstat doesn't exist.
311 def islink(path):
312 """Test for symbolic link.
313 On WindowsNT/95 and OS/2 always returns false
315 return False
317 # alias exists to lexists
318 lexists = exists
320 # Is a path a mount point? Either a root (with or without drive letter)
321 # or an UNC path with at most a / or \ after the mount point.
323 def ismount(path):
324 """Test whether a path is a mount point (defined as root of drive)"""
325 seps = _get_bothseps(path)
326 root, rest = splitdrive(path)
327 if root and root[0] in seps:
328 return (not rest) or (rest in seps)
329 return rest in seps
332 # Expand paths beginning with '~' or '~user'.
333 # '~' means $HOME; '~user' means that user's home directory.
334 # If the path doesn't begin with '~', or if the user or $HOME is unknown,
335 # the path is returned unchanged (leaving error reporting to whatever
336 # function is called with the expanded path as argument).
337 # See also module 'glob' for expansion of *, ? and [...] in pathnames.
338 # (A function should also be defined to do full *sh-style environment
339 # variable expansion.)
341 def expanduser(path):
342 """Expand ~ and ~user constructs.
344 If user or $HOME is unknown, do nothing."""
345 if isinstance(path, bytes):
346 tilde = b'~'
347 else:
348 tilde = '~'
349 if not path.startswith(tilde):
350 return path
351 i, n = 1, len(path)
352 while i < n and path[i] not in _get_bothseps(path):
353 i = i + 1
355 if 'HOME' in os.environ:
356 userhome = os.environ['HOME']
357 elif 'USERPROFILE' in os.environ:
358 userhome = os.environ['USERPROFILE']
359 elif not 'HOMEPATH' in os.environ:
360 return path
361 else:
362 try:
363 drive = os.environ['HOMEDRIVE']
364 except KeyError:
365 drive = ''
366 userhome = join(drive, os.environ['HOMEPATH'])
368 if isinstance(path, bytes):
369 userhome = userhome.encode(sys.getfilesystemencoding())
371 if i != 1: #~user
372 userhome = join(dirname(userhome), path[1:i])
374 return userhome + path[i:]
377 # Expand paths containing shell variable substitutions.
378 # The following rules apply:
379 # - no expansion within single quotes
380 # - '$$' is translated into '$'
381 # - '%%' is translated into '%' if '%%' are not seen in %var1%%var2%
382 # - ${varname} is accepted.
383 # - $varname is accepted.
384 # - %varname% is accepted.
385 # - varnames can be made out of letters, digits and the characters '_-'
386 # (though is not verifed in the ${varname} and %varname% cases)
387 # XXX With COMMAND.COM you can use any characters in a variable name,
388 # XXX except '^|<>='.
390 def expandvars(path):
391 """Expand shell variables of the forms $var, ${var} and %var%.
393 Unknown variables are left unchanged."""
394 if isinstance(path, bytes):
395 if ord('$') not in path and ord('%') not in path:
396 return path
397 import string
398 varchars = bytes(string.ascii_letters + string.digits + '_-', 'ascii')
399 quote = b'\''
400 percent = b'%'
401 brace = b'{'
402 dollar = b'$'
403 else:
404 if '$' not in path and '%' not in path:
405 return path
406 import string
407 varchars = string.ascii_letters + string.digits + '_-'
408 quote = '\''
409 percent = '%'
410 brace = '{'
411 dollar = '$'
412 res = path[:0]
413 index = 0
414 pathlen = len(path)
415 while index < pathlen:
416 c = path[index:index+1]
417 if c == quote: # no expansion within single quotes
418 path = path[index + 1:]
419 pathlen = len(path)
420 try:
421 index = path.index(c)
422 res = res + c + path[:index + 1]
423 except ValueError:
424 res = res + path
425 index = pathlen - 1
426 elif c == percent: # variable or '%'
427 if path[index + 1:index + 2] == percent:
428 res = res + c
429 index = index + 1
430 else:
431 path = path[index+1:]
432 pathlen = len(path)
433 try:
434 index = path.index(percent)
435 except ValueError:
436 res = res + percent + path
437 index = pathlen - 1
438 else:
439 var = path[:index]
440 if isinstance(path, bytes):
441 var = var.decode('ascii')
442 if var in os.environ:
443 value = os.environ[var]
444 else:
445 value = '%' + var + '%'
446 if isinstance(path, bytes):
447 value = value.encode('ascii')
448 res = res + value
449 elif c == dollar: # variable or '$$'
450 if path[index + 1:index + 2] == dollar:
451 res = res + c
452 index = index + 1
453 elif path[index + 1:index + 2] == brace:
454 path = path[index+2:]
455 pathlen = len(path)
456 try:
457 if isinstance(path, bytes):
458 index = path.index(b'}')
459 else:
460 index = path.index('}')
461 var = path[:index]
462 if isinstance(path, bytes):
463 var = var.decode('ascii')
464 if var in os.environ:
465 value = os.environ[var]
466 else:
467 value = '${' + var + '}'
468 if isinstance(path, bytes):
469 value = value.encode('ascii')
470 res = res + value
471 except ValueError:
472 if isinstance(path, bytes):
473 res = res + b'${' + path
474 else:
475 res = res + '${' + path
476 index = pathlen - 1
477 else:
478 var = ''
479 index = index + 1
480 c = path[index:index + 1]
481 while c and c in varchars:
482 if isinstance(path, bytes):
483 var = var + c.decode('ascii')
484 else:
485 var = var + c
486 index = index + 1
487 c = path[index:index + 1]
488 if var in os.environ:
489 value = os.environ[var]
490 else:
491 value = '$' + var
492 if isinstance(path, bytes):
493 value = value.encode('ascii')
494 res = res + value
495 if c:
496 index = index - 1
497 else:
498 res = res + c
499 index = index + 1
500 return res
503 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.
504 # Previously, this function also truncated pathnames to 8+3 format,
505 # but as this module is called "ntpath", that's obviously wrong!
507 def normpath(path):
508 """Normalize path, eliminating double slashes, etc."""
509 sep = _get_sep(path)
510 dotdot = _get_dot(path) * 2
511 path = path.replace(_get_altsep(path), sep)
512 prefix, path = splitdrive(path)
514 # collapse initial backslashes
515 if path.startswith(sep):
516 prefix = prefix + sep
517 path = path.lstrip(sep)
519 comps = path.split(sep)
520 i = 0
521 while i < len(comps):
522 if not comps[i] or comps[i] == _get_dot(path):
523 del comps[i]
524 elif comps[i] == dotdot:
525 if i > 0 and comps[i-1] != dotdot:
526 del comps[i-1:i+1]
527 i -= 1
528 elif i == 0 and prefix.endswith(_get_sep(path)):
529 del comps[i]
530 else:
531 i += 1
532 else:
533 i += 1
534 # If the path is now empty, substitute '.'
535 if not prefix and not comps:
536 comps.append(_get_dot(path))
537 return prefix + sep.join(comps)
540 # Return an absolute path.
541 try:
542 from nt import _getfullpathname
544 except ImportError: # not running on Windows - mock up something sensible
545 def abspath(path):
546 """Return the absolute version of a path."""
547 if not isabs(path):
548 if isinstance(path, bytes):
549 cwd = os.getcwdb()
550 else:
551 cwd = os.getcwd()
552 path = join(cwd, path)
553 return normpath(path)
555 else: # use native Windows method on Windows
556 def abspath(path):
557 """Return the absolute version of a path."""
559 if path: # Empty path must return current working directory.
560 try:
561 path = _getfullpathname(path)
562 except WindowsError:
563 pass # Bad path - return unchanged.
564 elif isinstance(path, bytes):
565 path = os.getcwdb()
566 else:
567 path = os.getcwd()
568 return normpath(path)
570 # realpath is a no-op on systems without islink support
571 realpath = abspath
572 # Win9x family and earlier have no Unicode filename support.
573 supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
574 sys.getwindowsversion()[3] >= 2)
576 def relpath(path, start=curdir):
577 """Return a relative version of a path"""
578 sep = _get_sep(path)
580 if start is curdir:
581 start = _get_dot(path)
583 if not path:
584 raise ValueError("no path specified")
586 start_abs = abspath(normpath(start))
587 path_abs = abspath(normpath(path))
588 start_drive, start_rest = splitdrive(start_abs)
589 path_drive, path_rest = splitdrive(path_abs)
590 if start_drive != path_drive:
591 error = "path is on mount '{0}', start on mount '{1}'".format(
592 path_drive, start_drive)
593 raise ValueError(error)
595 start_list = [x for x in start_rest.split(sep) if x]
596 path_list = [x for x in path_rest.split(sep) if x]
597 # Work out how much of the filepath is shared by start and path.
598 i = 0
599 for e1, e2 in zip(start_list, path_list):
600 if e1 != e2:
601 break
602 i += 1
604 if isinstance(path, bytes):
605 pardir = b'..'
606 else:
607 pardir = '..'
608 rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
609 if not rel_list:
610 return _get_dot(path)
611 return join(*rel_list)