Merged revisions 82952,82954 via svnmerge from
[python/dscho.git] / Lib / posixpath.py
blobaace2b203d31f50fe88c65795b47fcb410a0c250
1 """Common operations on Posix pathnames.
3 Instead of importing this module directly, import os and refer to
4 this module as os.path. The "os.path" name is an alias for this
5 module on Posix systems; on other systems (e.g. Mac, Windows),
6 os.path provides the same operations in a manner specific to that
7 platform, and is an alias to another module (e.g. macpath, ntpath).
9 Some of this can actually be useful on non-Posix systems too, e.g.
10 for manipulation of the pathname component of URLs.
11 """
13 import os
14 import sys
15 import stat
16 import genericpath
17 from genericpath import *
19 __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
20 "basename","dirname","commonprefix","getsize","getmtime",
21 "getatime","getctime","islink","exists","lexists","isdir","isfile",
22 "ismount", "expanduser","expandvars","normpath","abspath",
23 "samefile","sameopenfile","samestat",
24 "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
25 "devnull","realpath","supports_unicode_filenames","relpath"]
27 # Strings representing various path-related bits and pieces.
28 # These are primarily for export; internally, they are hardcoded.
29 curdir = '.'
30 pardir = '..'
31 extsep = '.'
32 sep = '/'
33 pathsep = ':'
34 defpath = ':/bin:/usr/bin'
35 altsep = None
36 devnull = '/dev/null'
38 def _get_sep(path):
39 if isinstance(path, bytes):
40 return b'/'
41 else:
42 return '/'
44 # Normalize the case of a pathname. Trivial in Posix, string.lower on Mac.
45 # On MS-DOS this may also turn slashes into backslashes; however, other
46 # normalizations (such as optimizing '../' away) are not allowed
47 # (another function should be defined to do that).
49 def normcase(s):
50 """Normalize case of pathname. Has no effect under Posix"""
51 # TODO: on Mac OS X, this should really return s.lower().
52 return s
55 # Return whether a path is absolute.
56 # Trivial in Posix, harder on the Mac or MS-DOS.
58 def isabs(s):
59 """Test whether a path is absolute"""
60 sep = _get_sep(s)
61 return s.startswith(sep)
64 # Join pathnames.
65 # Ignore the previous parts if a part is absolute.
66 # Insert a '/' unless the first part is empty or already ends in '/'.
68 def join(a, *p):
69 """Join two or more pathname components, inserting '/' as needed.
70 If any component is an absolute path, all previous path components
71 will be discarded."""
72 sep = _get_sep(a)
73 path = a
74 for b in p:
75 if b.startswith(sep):
76 path = b
77 elif not path or path.endswith(sep):
78 path += b
79 else:
80 path += sep + b
81 return path
84 # Split a path in head (everything up to the last '/') and tail (the
85 # rest). If the path ends in '/', tail will be empty. If there is no
86 # '/' in the path, head will be empty.
87 # Trailing '/'es are stripped from head unless it is the root.
89 def split(p):
90 """Split a pathname. Returns tuple "(head, tail)" where "tail" is
91 everything after the final slash. Either part may be empty."""
92 sep = _get_sep(p)
93 i = p.rfind(sep) + 1
94 head, tail = p[:i], p[i:]
95 if head and head != sep*len(head):
96 head = head.rstrip(sep)
97 return head, tail
100 # Split a path in root and extension.
101 # The extension is everything starting at the last dot in the last
102 # pathname component; the root is everything before that.
103 # It is always true that root + ext == p.
105 def splitext(p):
106 if isinstance(p, bytes):
107 sep = b'/'
108 extsep = b'.'
109 else:
110 sep = '/'
111 extsep = '.'
112 return genericpath._splitext(p, sep, None, extsep)
113 splitext.__doc__ = genericpath._splitext.__doc__
115 # Split a pathname into a drive specification and the rest of the
116 # path. Useful on DOS/Windows/NT; on Unix, the drive is always empty.
118 def splitdrive(p):
119 """Split a pathname into drive and path. On Posix, drive is always
120 empty."""
121 return p[:0], p
124 # Return the tail (basename) part of a path, same as split(path)[1].
126 def basename(p):
127 """Returns the final component of a pathname"""
128 sep = _get_sep(p)
129 i = p.rfind(sep) + 1
130 return p[i:]
133 # Return the head (dirname) part of a path, same as split(path)[0].
135 def dirname(p):
136 """Returns the directory component of a pathname"""
137 sep = _get_sep(p)
138 i = p.rfind(sep) + 1
139 head = p[:i]
140 if head and head != sep*len(head):
141 head = head.rstrip(sep)
142 return head
145 # Is a path a symbolic link?
146 # This will always return false on systems where os.lstat doesn't exist.
148 def islink(path):
149 """Test whether a path is a symbolic link"""
150 try:
151 st = os.lstat(path)
152 except (os.error, AttributeError):
153 return False
154 return stat.S_ISLNK(st.st_mode)
156 # Being true for dangling symbolic links is also useful.
158 def lexists(path):
159 """Test whether a path exists. Returns True for broken symbolic links"""
160 try:
161 st = os.lstat(path)
162 except os.error:
163 return False
164 return True
167 # Are two filenames really pointing to the same file?
169 def samefile(f1, f2):
170 """Test whether two pathnames reference the same actual file"""
171 s1 = os.stat(f1)
172 s2 = os.stat(f2)
173 return samestat(s1, s2)
176 # Are two open files really referencing the same file?
177 # (Not necessarily the same file descriptor!)
179 def sameopenfile(fp1, fp2):
180 """Test whether two open file objects reference the same file"""
181 s1 = os.fstat(fp1)
182 s2 = os.fstat(fp2)
183 return samestat(s1, s2)
186 # Are two stat buffers (obtained from stat, fstat or lstat)
187 # describing the same file?
189 def samestat(s1, s2):
190 """Test whether two stat buffers reference the same file"""
191 return s1.st_ino == s2.st_ino and \
192 s1.st_dev == s2.st_dev
195 # Is a path a mount point?
196 # (Does this work for all UNIXes? Is it even guaranteed to work by Posix?)
198 def ismount(path):
199 """Test whether a path is a mount point"""
200 try:
201 s1 = os.lstat(path)
202 if isinstance(path, bytes):
203 parent = join(path, b'..')
204 else:
205 parent = join(path, '..')
206 s2 = os.lstat(parent)
207 except os.error:
208 return False # It doesn't exist -- so not a mount point :-)
209 dev1 = s1.st_dev
210 dev2 = s2.st_dev
211 if dev1 != dev2:
212 return True # path/.. on a different device as path
213 ino1 = s1.st_ino
214 ino2 = s2.st_ino
215 if ino1 == ino2:
216 return True # path/.. is the same i-node as path
217 return False
220 # Expand paths beginning with '~' or '~user'.
221 # '~' means $HOME; '~user' means that user's home directory.
222 # If the path doesn't begin with '~', or if the user or $HOME is unknown,
223 # the path is returned unchanged (leaving error reporting to whatever
224 # function is called with the expanded path as argument).
225 # See also module 'glob' for expansion of *, ? and [...] in pathnames.
226 # (A function should also be defined to do full *sh-style environment
227 # variable expansion.)
229 def expanduser(path):
230 """Expand ~ and ~user constructions. If user or $HOME is unknown,
231 do nothing."""
232 if isinstance(path, bytes):
233 tilde = b'~'
234 else:
235 tilde = '~'
236 if not path.startswith(tilde):
237 return path
238 sep = _get_sep(path)
239 i = path.find(sep, 1)
240 if i < 0:
241 i = len(path)
242 if i == 1:
243 if 'HOME' not in os.environ:
244 import pwd
245 userhome = pwd.getpwuid(os.getuid()).pw_dir
246 else:
247 userhome = os.environ['HOME']
248 else:
249 import pwd
250 name = path[1:i]
251 if isinstance(name, bytes):
252 name = str(name, 'ASCII')
253 try:
254 pwent = pwd.getpwnam(name)
255 except KeyError:
256 return path
257 userhome = pwent.pw_dir
258 if isinstance(path, bytes):
259 userhome = userhome.encode(sys.getfilesystemencoding())
260 root = b'/'
261 else:
262 root = '/'
263 userhome = userhome.rstrip(root) or userhome
264 return userhome + path[i:]
267 # Expand paths containing shell variable substitutions.
268 # This expands the forms $variable and ${variable} only.
269 # Non-existent variables are left unchanged.
271 _varprog = None
272 _varprogb = None
274 def expandvars(path):
275 """Expand shell variables of form $var and ${var}. Unknown variables
276 are left unchanged."""
277 global _varprog, _varprogb
278 if isinstance(path, bytes):
279 if b'$' not in path:
280 return path
281 if not _varprogb:
282 import re
283 _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
284 search = _varprogb.search
285 start = b'{'
286 end = b'}'
287 else:
288 if '$' not in path:
289 return path
290 if not _varprog:
291 import re
292 _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
293 search = _varprog.search
294 start = '{'
295 end = '}'
296 i = 0
297 while True:
298 m = search(path, i)
299 if not m:
300 break
301 i, j = m.span(0)
302 name = m.group(1)
303 if name.startswith(start) and name.endswith(end):
304 name = name[1:-1]
305 if isinstance(name, bytes):
306 name = str(name, 'ASCII')
307 if name in os.environ:
308 tail = path[j:]
309 value = os.environ[name]
310 if isinstance(path, bytes):
311 value = value.encode('ASCII')
312 path = path[:i] + value
313 i = len(path)
314 path += tail
315 else:
316 i = j
317 return path
320 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
321 # It should be understood that this may change the meaning of the path
322 # if it contains symbolic links!
324 def normpath(path):
325 """Normalize path, eliminating double slashes, etc."""
326 if isinstance(path, bytes):
327 sep = b'/'
328 empty = b''
329 dot = b'.'
330 dotdot = b'..'
331 else:
332 sep = '/'
333 empty = ''
334 dot = '.'
335 dotdot = '..'
336 if path == empty:
337 return dot
338 initial_slashes = path.startswith(sep)
339 # POSIX allows one or two initial slashes, but treats three or more
340 # as single slash.
341 if (initial_slashes and
342 path.startswith(sep*2) and not path.startswith(sep*3)):
343 initial_slashes = 2
344 comps = path.split(sep)
345 new_comps = []
346 for comp in comps:
347 if comp in (empty, dot):
348 continue
349 if (comp != dotdot or (not initial_slashes and not new_comps) or
350 (new_comps and new_comps[-1] == dotdot)):
351 new_comps.append(comp)
352 elif new_comps:
353 new_comps.pop()
354 comps = new_comps
355 path = sep.join(comps)
356 if initial_slashes:
357 path = sep*initial_slashes + path
358 return path or dot
361 def abspath(path):
362 """Return an absolute path."""
363 if not isabs(path):
364 if isinstance(path, bytes):
365 cwd = os.getcwdb()
366 else:
367 cwd = os.getcwd()
368 path = join(cwd, path)
369 return normpath(path)
372 # Return a canonical path (i.e. the absolute location of a file on the
373 # filesystem).
375 def realpath(filename):
376 """Return the canonical path of the specified filename, eliminating any
377 symbolic links encountered in the path."""
378 if isinstance(filename, bytes):
379 sep = b'/'
380 empty = b''
381 else:
382 sep = '/'
383 empty = ''
384 if isabs(filename):
385 bits = [sep] + filename.split(sep)[1:]
386 else:
387 bits = [empty] + filename.split(sep)
389 for i in range(2, len(bits)+1):
390 component = join(*bits[0:i])
391 # Resolve symbolic links.
392 if islink(component):
393 resolved = _resolve_link(component)
394 if resolved is None:
395 # Infinite loop -- return original component + rest of the path
396 return abspath(join(*([component] + bits[i:])))
397 else:
398 newpath = join(*([resolved] + bits[i:]))
399 return realpath(newpath)
401 return abspath(filename)
404 def _resolve_link(path):
405 """Internal helper function. Takes a path and follows symlinks
406 until we either arrive at something that isn't a symlink, or
407 encounter a path we've seen before (meaning that there's a loop).
409 paths_seen = set()
410 while islink(path):
411 if path in paths_seen:
412 # Already seen this path, so we must have a symlink loop
413 return None
414 paths_seen.add(path)
415 # Resolve where the link points to
416 resolved = os.readlink(path)
417 if not isabs(resolved):
418 dir = dirname(path)
419 path = normpath(join(dir, resolved))
420 else:
421 path = normpath(resolved)
422 return path
424 supports_unicode_filenames = False
426 def relpath(path, start=None):
427 """Return a relative version of a path"""
429 if not path:
430 raise ValueError("no path specified")
432 if isinstance(path, bytes):
433 curdir = b'.'
434 sep = b'/'
435 pardir = b'..'
436 else:
437 curdir = '.'
438 sep = '/'
439 pardir = '..'
441 if start is None:
442 start = curdir
444 start_list = abspath(start).split(sep)
445 path_list = abspath(path).split(sep)
447 # Work out how much of the filepath is shared by start and path.
448 i = len(commonprefix([start_list, path_list]))
450 rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
451 if not rel_list:
452 return curdir
453 return join(*rel_list)