#5827: make sure that normpath preserves unicode
[python.git] / Lib / ntpath.py
blob02d8584736dea686276e668f66432c35bfecab87
1 # Module 'ntpath' -- common operations on WinNT/Win95 pathnames
2 """Common pathname manipulations, WindowsNT/95 version.
4 Instead of importing this module directly, import os and refer to this
5 module as os.path.
6 """
8 import os
9 import sys
10 import stat
11 import genericpath
12 import warnings
14 from genericpath import *
16 __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
17 "basename","dirname","commonprefix","getsize","getmtime",
18 "getatime","getctime", "islink","exists","lexists","isdir","isfile",
19 "ismount","walk","expanduser","expandvars","normpath","abspath",
20 "splitunc","curdir","pardir","sep","pathsep","defpath","altsep",
21 "extsep","devnull","realpath","supports_unicode_filenames","relpath"]
23 # strings representing various path-related bits and pieces
24 curdir = '.'
25 pardir = '..'
26 extsep = '.'
27 sep = '\\'
28 pathsep = ';'
29 altsep = '/'
30 defpath = '.;C:\\bin'
31 if 'ce' in sys.builtin_module_names:
32 defpath = '\\Windows'
33 elif 'os2' in sys.builtin_module_names:
34 # OS/2 w/ VACPP
35 altsep = '/'
36 devnull = 'nul'
38 # Normalize the case of a pathname and map slashes to backslashes.
39 # Other normalizations (such as optimizing '../' away) are not done
40 # (this is done by normpath).
42 def normcase(s):
43 """Normalize case of pathname.
45 Makes all characters lowercase and all slashes into backslashes."""
46 return s.replace("/", "\\").lower()
49 # Return whether a path is absolute.
50 # Trivial in Posix, harder on the Mac or MS-DOS.
51 # For DOS it is absolute if it starts with a slash or backslash (current
52 # volume), or if a pathname after the volume letter and colon / UNC resource
53 # starts with a slash or backslash.
55 def isabs(s):
56 """Test whether a path is absolute"""
57 s = splitdrive(s)[1]
58 return s != '' and s[:1] in '/\\'
61 # Join two (or more) paths.
63 def join(a, *p):
64 """Join two or more pathname components, inserting "\\" as needed.
65 If any component is an absolute path, all previous path components
66 will be discarded."""
67 path = a
68 for b in p:
69 b_wins = 0 # set to 1 iff b makes path irrelevant
70 if path == "":
71 b_wins = 1
73 elif isabs(b):
74 # This probably wipes out path so far. However, it's more
75 # complicated if path begins with a drive letter:
76 # 1. join('c:', '/a') == 'c:/a'
77 # 2. join('c:/', '/a') == 'c:/a'
78 # But
79 # 3. join('c:/a', '/b') == '/b'
80 # 4. join('c:', 'd:/') = 'd:/'
81 # 5. join('c:/', 'd:/') = 'd:/'
82 if path[1:2] != ":" or b[1:2] == ":":
83 # Path doesn't start with a drive letter, or cases 4 and 5.
84 b_wins = 1
86 # Else path has a drive letter, and b doesn't but is absolute.
87 elif len(path) > 3 or (len(path) == 3 and
88 path[-1] not in "/\\"):
89 # case 3
90 b_wins = 1
92 if b_wins:
93 path = b
94 else:
95 # Join, and ensure there's a separator.
96 assert len(path) > 0
97 if path[-1] in "/\\":
98 if b and b[0] in "/\\":
99 path += b[1:]
100 else:
101 path += b
102 elif path[-1] == ":":
103 path += b
104 elif b:
105 if b[0] in "/\\":
106 path += b
107 else:
108 path += "\\" + b
109 else:
110 # path is not empty and does not end with a backslash,
111 # but b is empty; since, e.g., split('a/') produces
112 # ('a', ''), it's best if join() adds a backslash in
113 # this case.
114 path += '\\'
116 return path
119 # Split a path in a drive specification (a drive letter followed by a
120 # colon) and the path specification.
121 # It is always true that drivespec + pathspec == p
122 def splitdrive(p):
123 """Split a pathname into drive and path specifiers. Returns a 2-tuple
124 "(drive,path)"; either part may be empty"""
125 if p[1:2] == ':':
126 return p[0:2], p[2:]
127 return '', p
130 # Parse UNC paths
131 def splitunc(p):
132 """Split a pathname into UNC mount point and relative path specifiers.
134 Return a 2-tuple (unc, rest); either part may be empty.
135 If unc is not empty, it has the form '//host/mount' (or similar
136 using backslashes). unc+rest is always the input path.
137 Paths containing drive letters never have an UNC part.
139 if p[1:2] == ':':
140 return '', p # Drive letter present
141 firstTwo = p[0:2]
142 if firstTwo == '//' or firstTwo == '\\\\':
143 # is a UNC path:
144 # vvvvvvvvvvvvvvvvvvvv equivalent to drive letter
145 # \\machine\mountpoint\directories...
146 # directory ^^^^^^^^^^^^^^^
147 normp = normcase(p)
148 index = normp.find('\\', 2)
149 if index == -1:
150 ##raise RuntimeError, 'illegal UNC path: "' + p + '"'
151 return ("", p)
152 index = normp.find('\\', index + 1)
153 if index == -1:
154 index = len(p)
155 return p[:index], p[index:]
156 return '', p
159 # Split a path in head (everything up to the last '/') and tail (the
160 # rest). After the trailing '/' is stripped, the invariant
161 # join(head, tail) == p holds.
162 # The resulting head won't end in '/' unless it is the root.
164 def split(p):
165 """Split a pathname.
167 Return tuple (head, tail) where tail is everything after the final slash.
168 Either part may be empty."""
170 d, p = splitdrive(p)
171 # set i to index beyond p's last slash
172 i = len(p)
173 while i and p[i-1] not in '/\\':
174 i = i - 1
175 head, tail = p[:i], p[i:] # now tail has no slashes
176 # remove trailing slashes from head, unless it's all slashes
177 head2 = head
178 while head2 and head2[-1] in '/\\':
179 head2 = head2[:-1]
180 head = head2 or head
181 return d + head, tail
184 # Split a path in root and extension.
185 # The extension is everything starting at the last dot in the last
186 # pathname component; the root is everything before that.
187 # It is always true that root + ext == p.
189 def splitext(p):
190 return genericpath._splitext(p, sep, altsep, extsep)
191 splitext.__doc__ = genericpath._splitext.__doc__
194 # Return the tail (basename) part of a path.
196 def basename(p):
197 """Returns the final component of a pathname"""
198 return split(p)[1]
201 # Return the head (dirname) part of a path.
203 def dirname(p):
204 """Returns the directory component of a pathname"""
205 return split(p)[0]
207 # Is a path a symbolic link?
208 # This will always return false on systems where posix.lstat doesn't exist.
210 def islink(path):
211 """Test for symbolic link.
212 On WindowsNT/95 and OS/2 always returns false
214 return False
216 # alias exists to lexists
217 lexists = exists
219 # Is a path a mount point? Either a root (with or without drive letter)
220 # or an UNC path with at most a / or \ after the mount point.
222 def ismount(path):
223 """Test whether a path is a mount point (defined as root of drive)"""
224 unc, rest = splitunc(path)
225 if unc:
226 return rest in ("", "/", "\\")
227 p = splitdrive(path)[1]
228 return len(p) == 1 and p[0] in '/\\'
231 # Directory tree walk.
232 # For each directory under top (including top itself, but excluding
233 # '.' and '..'), func(arg, dirname, filenames) is called, where
234 # dirname is the name of the directory and filenames is the list
235 # of files (and subdirectories etc.) in the directory.
236 # The func may modify the filenames list, to implement a filter,
237 # or to impose a different order of visiting.
239 def walk(top, func, arg):
240 """Directory tree walk with callback function.
242 For each directory in the directory tree rooted at top (including top
243 itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
244 dirname is the name of the directory, and fnames a list of the names of
245 the files and subdirectories in dirname (excluding '.' and '..'). func
246 may modify the fnames list in-place (e.g. via del or slice assignment),
247 and walk will only recurse into the subdirectories whose names remain in
248 fnames; this can be used to implement a filter, or to impose a specific
249 order of visiting. No semantics are defined for, or required of, arg,
250 beyond that arg is always passed to func. It can be used, e.g., to pass
251 a filename pattern, or a mutable object designed to accumulate
252 statistics. Passing None for arg is common."""
253 warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.",
254 stacklevel=2)
255 try:
256 names = os.listdir(top)
257 except os.error:
258 return
259 func(arg, top, names)
260 for name in names:
261 name = join(top, name)
262 if isdir(name):
263 walk(name, func, arg)
266 # Expand paths beginning with '~' or '~user'.
267 # '~' means $HOME; '~user' means that user's home directory.
268 # If the path doesn't begin with '~', or if the user or $HOME is unknown,
269 # the path is returned unchanged (leaving error reporting to whatever
270 # function is called with the expanded path as argument).
271 # See also module 'glob' for expansion of *, ? and [...] in pathnames.
272 # (A function should also be defined to do full *sh-style environment
273 # variable expansion.)
275 def expanduser(path):
276 """Expand ~ and ~user constructs.
278 If user or $HOME is unknown, do nothing."""
279 if path[:1] != '~':
280 return path
281 i, n = 1, len(path)
282 while i < n and path[i] not in '/\\':
283 i = i + 1
285 if 'HOME' in os.environ:
286 userhome = os.environ['HOME']
287 elif 'USERPROFILE' in os.environ:
288 userhome = os.environ['USERPROFILE']
289 elif not 'HOMEPATH' in os.environ:
290 return path
291 else:
292 try:
293 drive = os.environ['HOMEDRIVE']
294 except KeyError:
295 drive = ''
296 userhome = join(drive, os.environ['HOMEPATH'])
298 if i != 1: #~user
299 userhome = join(dirname(userhome), path[1:i])
301 return userhome + path[i:]
304 # Expand paths containing shell variable substitutions.
305 # The following rules apply:
306 # - no expansion within single quotes
307 # - '$$' is translated into '$'
308 # - '%%' is translated into '%' if '%%' are not seen in %var1%%var2%
309 # - ${varname} is accepted.
310 # - $varname is accepted.
311 # - %varname% is accepted.
312 # - varnames can be made out of letters, digits and the characters '_-'
313 # (though is not verifed in the ${varname} and %varname% cases)
314 # XXX With COMMAND.COM you can use any characters in a variable name,
315 # XXX except '^|<>='.
317 def expandvars(path):
318 """Expand shell variables of the forms $var, ${var} and %var%.
320 Unknown variables are left unchanged."""
321 if '$' not in path and '%' not in path:
322 return path
323 import string
324 varchars = string.ascii_letters + string.digits + '_-'
325 res = ''
326 index = 0
327 pathlen = len(path)
328 while index < pathlen:
329 c = path[index]
330 if c == '\'': # no expansion within single quotes
331 path = path[index + 1:]
332 pathlen = len(path)
333 try:
334 index = path.index('\'')
335 res = res + '\'' + path[:index + 1]
336 except ValueError:
337 res = res + path
338 index = pathlen - 1
339 elif c == '%': # variable or '%'
340 if path[index + 1:index + 2] == '%':
341 res = res + c
342 index = index + 1
343 else:
344 path = path[index+1:]
345 pathlen = len(path)
346 try:
347 index = path.index('%')
348 except ValueError:
349 res = res + '%' + path
350 index = pathlen - 1
351 else:
352 var = path[:index]
353 if var in os.environ:
354 res = res + os.environ[var]
355 else:
356 res = res + '%' + var + '%'
357 elif c == '$': # variable or '$$'
358 if path[index + 1:index + 2] == '$':
359 res = res + c
360 index = index + 1
361 elif path[index + 1:index + 2] == '{':
362 path = path[index+2:]
363 pathlen = len(path)
364 try:
365 index = path.index('}')
366 var = path[:index]
367 if var in os.environ:
368 res = res + os.environ[var]
369 else:
370 res = res + '${' + var + '}'
371 except ValueError:
372 res = res + '${' + path
373 index = pathlen - 1
374 else:
375 var = ''
376 index = index + 1
377 c = path[index:index + 1]
378 while c != '' and c in varchars:
379 var = var + c
380 index = index + 1
381 c = path[index:index + 1]
382 if var in os.environ:
383 res = res + os.environ[var]
384 else:
385 res = res + '$' + var
386 if c != '':
387 index = index - 1
388 else:
389 res = res + c
390 index = index + 1
391 return res
394 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.
395 # Previously, this function also truncated pathnames to 8+3 format,
396 # but as this module is called "ntpath", that's obviously wrong!
398 def normpath(path):
399 """Normalize path, eliminating double slashes, etc."""
400 # Preserve unicode (if path is unicode)
401 backslash, dot = (u'\\', u'.') if isinstance(path, unicode) else ('\\', '.')
402 path = path.replace("/", "\\")
403 prefix, path = splitdrive(path)
404 # We need to be careful here. If the prefix is empty, and the path starts
405 # with a backslash, it could either be an absolute path on the current
406 # drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It
407 # is therefore imperative NOT to collapse multiple backslashes blindly in
408 # that case.
409 # The code below preserves multiple backslashes when there is no drive
410 # letter. This means that the invalid filename \\\a\b is preserved
411 # unchanged, where a\\\b is normalised to a\b. It's not clear that there
412 # is any better behaviour for such edge cases.
413 if prefix == '':
414 # No drive letter - preserve initial backslashes
415 while path[:1] == "\\":
416 prefix = prefix + backslash
417 path = path[1:]
418 else:
419 # We have a drive letter - collapse initial backslashes
420 if path.startswith("\\"):
421 prefix = prefix + backslash
422 path = path.lstrip("\\")
423 comps = path.split("\\")
424 i = 0
425 while i < len(comps):
426 if comps[i] in ('.', ''):
427 del comps[i]
428 elif comps[i] == '..':
429 if i > 0 and comps[i-1] != '..':
430 del comps[i-1:i+1]
431 i -= 1
432 elif i == 0 and prefix.endswith("\\"):
433 del comps[i]
434 else:
435 i += 1
436 else:
437 i += 1
438 # If the path is now empty, substitute '.'
439 if not prefix and not comps:
440 comps.append(dot)
441 return prefix + backslash.join(comps)
444 # Return an absolute path.
445 try:
446 from nt import _getfullpathname
448 except ImportError: # not running on Windows - mock up something sensible
449 def abspath(path):
450 """Return the absolute version of a path."""
451 if not isabs(path):
452 path = join(os.getcwd(), path)
453 return normpath(path)
455 else: # use native Windows method on Windows
456 def abspath(path):
457 """Return the absolute version of a path."""
459 if path: # Empty path must return current working directory.
460 try:
461 path = _getfullpathname(path)
462 except WindowsError:
463 pass # Bad path - return unchanged.
464 else:
465 path = os.getcwd()
466 return normpath(path)
468 # realpath is a no-op on systems without islink support
469 realpath = abspath
470 # Win9x family and earlier have no Unicode filename support.
471 supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
472 sys.getwindowsversion()[3] >= 2)
474 def relpath(path, start=curdir):
475 """Return a relative version of a path"""
477 if not path:
478 raise ValueError("no path specified")
479 start_list = abspath(start).split(sep)
480 path_list = abspath(path).split(sep)
481 if start_list[0].lower() != path_list[0].lower():
482 unc_path, rest = splitunc(path)
483 unc_start, rest = splitunc(start)
484 if bool(unc_path) ^ bool(unc_start):
485 raise ValueError("Cannot mix UNC and non-UNC paths (%s and %s)"
486 % (path, start))
487 else:
488 raise ValueError("path is on drive %s, start on drive %s"
489 % (path_list[0], start_list[0]))
490 # Work out how much of the filepath is shared by start and path.
491 for i in range(min(len(start_list), len(path_list))):
492 if start_list[i].lower() != path_list[i].lower():
493 break
494 else:
495 i += 1
497 rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
498 if not rel_list:
499 return curdir
500 return join(*rel_list)