2 # wc.py: functions for interacting with a Subversion working copy
4 # Subversion is a tool for revision control.
5 # See http://subversion.tigris.org for more information.
7 # ====================================================================
8 # Licensed to the Apache Software Foundation (ASF) under one
9 # or more contributor license agreements. See the NOTICE file
10 # distributed with this work for additional information
11 # regarding copyright ownership. The ASF licenses this file
12 # to you under the Apache License, Version 2.0 (the
13 # "License"); you may not use this file except in compliance
14 # with the License. You may obtain a copy of the License at
16 # http://www.apache.org/licenses/LICENSE-2.0
18 # Unless required by applicable law or agreed to in writing,
19 # software distributed under the License is distributed on an
20 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 # KIND, either express or implied. See the License for the
22 # specific language governing permissions and limitations
24 ######################################################################
35 # 'status -v' output looks like this:
37 # "%c%c%c%c%c%c%c %c %6s %6s %-12s %s\n"
39 # (Taken from 'print_status' in subversion/svn/status.c.)
41 # Here are the parameters. The middle number or string in parens is the
42 # match.group(), followed by a brief description of the field:
44 # - text status (1) (single letter)
45 # - prop status (1) (single letter)
46 # - wc-lockedness flag (2) (single letter: "L" or " ")
47 # - copied flag (3) (single letter: "+" or " ")
48 # - switched flag (4) (single letter: "S", "X" or " ")
49 # - repos lock status (5) (single letter: "K", "O", "B", "T", " ")
50 # - tree conflict flag (6) (single letter: "C" or " ")
54 # - out-of-date flag (7) (single letter: "*" or " ")
58 # - working revision ('wc_rev') (either digits or "-", "?" or " ")
62 # - last-changed revision (either digits or "?" or " ")
66 # - last author (optional string of non-whitespace
71 # - path ('path') (string of characters until newline)
73 # Working revision, last-changed revision, and last author are whitespace
74 # only if the item is missing.
76 _re_parse_status
= re
.compile('^([?!MACDRUGI_~ ][MACDRUG_ ])'
83 '((?P<wc_rev>\d+|-|\?) +(\d|-|\?)+ +(\S+) +)?'
86 _re_parse_skipped
= re
.compile("^Skipped.* '(.+)'\n")
88 _re_parse_summarize
= re
.compile("^([MAD ][M ]) (.+)\n")
90 _re_parse_checkout
= re
.compile('^([RMAGCUDE_ ][MAGCUDE_ ])'
94 _re_parse_co_skipped
= re
.compile('^(Restored|Skipped)\s+\'(.+)\'')
95 _re_parse_co_restored
= re
.compile('^(Restored)\s+\'(.+)\'')
97 # Lines typically have a verb followed by whitespace then a path.
98 _re_parse_commit
= re
.compile('^(\w+( \(bin\))?)\s+(.+)')
102 """Describes an existing or expected state of a working copy.
104 The primary metaphor here is a dictionary of paths mapping to instances
105 of StateItem, which describe each item in a working copy.
107 Note: the paths should be *relative* to the root of the working copy,
108 using '/' for the separator (see to_relpath()), and the root of the
109 working copy is identified by the empty path: ''.
112 def __init__(self
, wc_dir
, desc
):
113 "Create a State using the specified description."
114 assert isinstance(desc
, dict)
117 self
.desc
= desc
# dictionary: path -> StateItem
119 def add(self
, more_desc
):
120 "Add more state items into the State."
121 assert isinstance(more_desc
, dict)
123 self
.desc
.update(more_desc
)
125 def add_state(self
, parent
, state
):
126 "Import state items from a State object, reparent the items to PARENT."
127 assert isinstance(state
, State
)
129 if parent
and parent
[-1] != '/':
131 for path
, item
in state
.desc
.items():
133 self
.desc
[path
] = item
135 def remove(self
, *paths
):
136 "Remove a path from the state (the path must exist)."
138 del self
.desc
[to_relpath(path
)]
140 def copy(self
, new_root
=None):
141 """Make a deep copy of self. If NEW_ROOT is not None, then set the
142 copy's wc_dir NEW_ROOT instead of to self's wc_dir."""
144 for path
, item
in self
.desc
.items():
145 desc
[path
] = item
.copy()
147 new_root
= self
.wc_dir
148 return State(new_root
, desc
)
150 def tweak(self
, *args
, **kw
):
151 """Tweak the items' values.
153 Each argument in ARGS is the path of a StateItem that already exists in
154 this State. Each keyword argument in KW is a modifiable property of
157 The general form of this method is .tweak([paths...,] key=value...). If
158 one or more paths are provided, then those items' values are
159 modified. If no paths are given, then all items are modified.
164 path_ref
= self
.desc
[to_relpath(path
)]
166 e
.args
= ["Path '%s' not present in WC state descriptor" % path
]
170 for item
in self
.desc
.values():
173 def tweak_some(self
, filter, **kw
):
174 "Tweak the items for which the filter returns true."
175 for path
, item
in self
.desc
.items():
176 if list(filter(path
, item
)):
179 def subtree(self
, subtree_path
):
180 """Return a State object which is a deep copy of the sub-tree
181 identified by SUBTREE_PATH (which is assumed to contain only one
182 element rooted at the tree of this State object's WC_DIR)."""
184 for path
, item
in self
.desc
.items():
185 path_elements
= path
.split("/")
186 if len(path_elements
) > 1 and path_elements
[0] == subtree_path
:
187 desc
["/".join(path_elements
[1:])] = item
.copy()
188 return State(self
.wc_dir
, desc
)
190 def write_to_disk(self
, target_dir
):
191 """Construct a directory structure on disk, matching our state.
193 WARNING: any StateItem that does not have contents (.contents is None)
194 is assumed to be a directory.
196 if not os
.path
.exists(target_dir
):
197 os
.makedirs(target_dir
)
199 for path
, item
in self
.desc
.items():
200 fullpath
= os
.path
.join(target_dir
, path
)
201 if item
.contents
is None:
203 if not os
.path
.exists(fullpath
):
204 os
.makedirs(fullpath
)
208 # ensure its directory exists
209 dirpath
= os
.path
.dirname(fullpath
)
210 if not os
.path
.exists(dirpath
):
213 # write out the file contents now
214 open(fullpath
, 'wb').write(item
.contents
)
217 """Return a "normalized" version of self.
219 A normalized version has the following characteristics:
222 * paths use forward slashes
225 If self is already normalized, then it is returned. Otherwise, a
226 new State is constructed with (shallow) references to self's
229 If the caller needs a fully disjoint State, then use .copy() on
232 if self
.wc_dir
== '':
235 base
= to_relpath(os
.path
.normpath(self
.wc_dir
))
237 desc
= dict([(repos_join(base
, path
), item
)
238 for path
, item
in self
.desc
.items()])
239 return State('', desc
)
241 def compare(self
, other
):
242 """Compare this State against an OTHER State.
244 Three new set objects will be returned: CHANGED, UNIQUE_SELF, and
245 UNIQUE_OTHER. These contain paths of StateItems that are different
246 between SELF and OTHER, paths of items unique to SELF, and paths
247 of item that are unique to OTHER, respectively.
249 assert isinstance(other
, State
)
251 norm_self
= self
.normalize()
252 norm_other
= other
.normalize()
254 # fast-path the easy case
255 if norm_self
== norm_other
:
259 paths_self
= set(norm_self
.desc
.keys())
260 paths_other
= set(norm_other
.desc
.keys())
262 for path
in paths_self
.intersection(paths_other
):
263 if norm_self
.desc
[path
] != norm_other
.desc
[path
]:
266 return changed
, paths_self
- paths_other
, paths_other
- paths_self
268 def compare_and_display(self
, label
, other
):
269 """Compare this State against an OTHER State, and display differences.
271 Information will be written to stdout, displaying any differences
272 between the two states. LABEL will be used in the display. SELF is the
273 "expected" state, and OTHER is the "actual" state.
275 If any changes are detected/displayed, then SVNTreeUnequal is raised.
277 norm_self
= self
.normalize()
278 norm_other
= other
.normalize()
280 changed
, unique_self
, unique_other
= norm_self
.compare(norm_other
)
281 if not changed
and not unique_self
and not unique_other
:
284 # Use the shortest path as a way to find the "root-most" affected node.
285 def _shortest_path(path_set
):
287 for path
in path_set
:
288 if shortest
is None or len(path
) < len(shortest
):
293 path
= _shortest_path(changed
)
294 display_nodes(label
, path
, norm_self
.desc
[path
], norm_other
.desc
[path
])
296 path
= _shortest_path(unique_self
)
297 default_singleton_handler('actual ' + label
, path
, norm_self
.desc
[path
])
299 path
= _shortest_path(unique_other
)
300 default_singleton_handler('expected ' + label
, path
,
301 norm_other
.desc
[path
])
303 raise svntest
.tree
.SVNTreeUnequal
305 def tweak_for_entries_compare(self
):
306 for path
, item
in self
.desc
.copy().items():
308 # If this is an unversioned tree-conflict, remove it.
309 # These are only in their parents' THIS_DIR, they don't have entries.
310 if item
.status
[0] in '!?' and item
.treeconflict
== 'C':
313 # when reading the entry structures, we don't examine for text or
314 # property mods, so clear those flags. we also do not examine the
315 # filesystem, so we cannot detect missing or obstructed files.
316 if item
.status
[0] in 'M!~':
317 item
.status
= ' ' + item
.status
[1]
318 if item
.status
[1] == 'M':
319 item
.status
= item
.status
[0] + ' '
320 # under wc-ng terms, we may report a different revision than the
321 # backwards-compatible code should report. if there is a special
322 # value for compatibility, then use it.
323 if item
.entry_rev
is not None:
324 item
.wc_rev
= item
.entry_rev
325 item
.entry_rev
= None
327 # we don't contact the repository, so our only information is what
328 # is in the working copy. 'K' means we have one and it matches the
329 # repos. 'O' means we don't have one but the repos says the item
330 # is locked by us, elsewhere. 'T' means we have one, and the repos
331 # has one, but it is now owned by somebody else. 'B' means we have
332 # one, but the repos does not.
334 # for each case of "we have one", set the writelocked state to 'K',
335 # and clear it to None for the others. this will match what is
336 # generated when we examine our working copy state.
337 if item
.writelocked
in 'TB':
338 item
.writelocked
= 'K'
339 elif item
.writelocked
== 'O':
340 item
.writelocked
= None
343 "Return an old-style tree (for compatibility purposes)."
345 for path
, item
in self
.desc
.items():
346 nodelist
.append(item
.as_node_tuple(os
.path
.join(self
.wc_dir
, path
)))
348 tree
= svntest
.tree
.build_generic_tree(nodelist
)
350 check
= tree
.as_state()
353 pprint
.pprint(self
.desc
)
354 pprint
.pprint(check
.desc
)
355 # STATE -> TREE -> STATE is lossy.
356 # In many cases, TREE -> STATE -> TREE is not.
357 # Even though our conversion from a TREE has lost some information, we
358 # may be able to verify that our lesser-STATE produces the same TREE.
359 svntest
.tree
.compare_trees('mismatch', tree
, check
.old_tree())
364 return str(self
.old_tree())
366 def __eq__(self
, other
):
367 if not isinstance(other
, State
):
369 norm_self
= self
.normalize()
370 norm_other
= other
.normalize()
371 return norm_self
.desc
== norm_other
.desc
373 def __ne__(self
, other
):
374 return not self
.__eq
__(other
)
377 def from_status(cls
, lines
):
378 """Create a State object from 'svn status' output."""
380 def not_space(value
):
381 if value
and value
!= ' ':
387 if line
.startswith('DBG:'):
390 # Quit when we hit an externals status announcement.
391 ### someday we can fix the externals tests to expect the additional
392 ### flood of externals status data.
393 if line
.startswith('Performing'):
396 match
= _re_parse_status
.search(line
)
397 if not match
or match
.group(10) == '-':
398 # ignore non-matching lines, or items that only exist on repos
401 item
= StateItem(status
=match
.group(1),
402 locked
=not_space(match
.group(2)),
403 copied
=not_space(match
.group(3)),
404 switched
=not_space(match
.group(4)),
405 writelocked
=not_space(match
.group(5)),
406 treeconflict
=not_space(match
.group(6)),
407 wc_rev
=not_space(match
.group('wc_rev')),
409 desc
[to_relpath(match
.group('path'))] = item
414 def from_skipped(cls
, lines
):
415 """Create a State object from 'Skipped' lines."""
419 if line
.startswith('DBG:'):
422 match
= _re_parse_skipped
.search(line
)
424 desc
[to_relpath(match
.group(1))] = StateItem()
429 def from_summarize(cls
, lines
):
430 """Create a State object from 'svn diff --summarize' lines."""
434 if line
.startswith('DBG:'):
437 match
= _re_parse_summarize
.search(line
)
439 desc
[to_relpath(match
.group(2))] = StateItem(status
=match
.group(1))
444 def from_checkout(cls
, lines
, include_skipped
=True):
445 """Create a State object from 'svn checkout' lines."""
448 re_extra
= _re_parse_co_skipped
450 re_extra
= _re_parse_co_restored
454 if line
.startswith('DBG:'):
457 match
= _re_parse_checkout
.search(line
)
459 if match
.group(3) == 'C':
463 desc
[to_relpath(match
.group(4))] = StateItem(status
=match
.group(1),
464 treeconflict
=treeconflict
)
466 match
= re_extra
.search(line
)
468 desc
[to_relpath(match
.group(2))] = StateItem(verb
=match
.group(1))
473 def from_commit(cls
, lines
):
474 """Create a State object from 'svn commit' lines."""
478 if line
.startswith('DBG:') or line
.startswith('Transmitting'):
481 match
= _re_parse_commit
.search(line
)
483 desc
[to_relpath(match
.group(3))] = StateItem(verb
=match
.group(1))
488 def from_wc(cls
, base
, load_props
=False, ignore_svn
=True):
489 """Create a State object from a working copy.
491 Walks the tree at PATH, building a State based on the actual files
492 and directories found. If LOAD_PROPS is True, then the properties
493 will be loaded for all nodes (Very Expensive!). If IGNORE_SVN is
494 True, then the .svn subdirectories will be excluded from the State.
497 # we're going to walk the base, and the OS wants "."
501 dot_svn
= svntest
.main
.get_admin_name()
503 for dirpath
, dirs
, files
in os
.walk(base
):
504 parent
= path_to_key(dirpath
, base
)
505 if ignore_svn
and dot_svn
in dirs
:
507 for name
in dirs
+ files
:
508 node
= os
.path
.join(dirpath
, name
)
509 if os
.path
.isfile(node
):
510 contents
= open(node
, 'r').read()
513 desc
[repos_join(parent
, name
)] = StateItem(contents
=contents
)
516 paths
= [os
.path
.join(base
, to_ospath(p
)) for p
in desc
.keys()]
518 all_props
= svntest
.tree
.get_props(paths
)
519 for node
, props
in all_props
.items():
521 desc
['.'] = StateItem(props
=props
)
524 # 'svn proplist' strips './' from the paths. put it back on.
525 node
= os
.path
.join('.', node
)
526 desc
[path_to_key(node
, base
)].props
= props
531 def from_entries(cls
, base
):
532 """Create a State object from a working copy, via the old "entries" API.
534 Walks the tree at PATH, building a State based on the information
535 provided by the old entries API, as accessed via the 'entries-dump'
539 # we're going to walk the base, and the OS wants "."
542 if os
.path
.isfile(base
):
543 # a few tests run status on a single file. quick-and-dirty this. we
544 # really should analyze the entry (similar to below) to be general.
545 dirpath
, basename
= os
.path
.split(base
)
546 entries
= svntest
.main
.run_entriesdump(dirpath
)
548 to_relpath(base
): StateItem
.from_entry(entries
[basename
]),
552 dot_svn
= svntest
.main
.get_admin_name()
554 for dirpath
, dirs
, files
in os
.walk(base
):
556 # don't visit the .svn subdir
559 # this is not a versioned directory. remove all subdirectories since
560 # we don't want to visit them. then skip this directory.
564 entries
= svntest
.main
.run_entriesdump(dirpath
)
568 elif dirpath
.startswith('.' + os
.sep
):
569 parent
= to_relpath(dirpath
[2:])
571 parent
= to_relpath(dirpath
)
573 parent_url
= entries
[''].url
575 for name
, entry
in entries
.items():
576 # if the entry is marked as DELETED *and* it is something other than
577 # schedule-add, then skip it. we can add a new node "over" where a
578 # DELETED node lives.
579 if entry
.deleted
and entry
.schedule
!= 1:
581 if name
and entry
.kind
== 2:
582 # stub subdirectory. leave a "missing" StateItem in here. note
583 # that we can't put the status as "! " because that gets tweaked
584 # out of our expected tree.
585 item
= StateItem(status
=' ', wc_rev
='?')
586 desc
[repos_join(parent
, name
)] = item
588 item
= StateItem
.from_entry(entry
)
590 desc
[repos_join(parent
, name
)] = item
591 implied_url
= repos_join(parent_url
, svn_url_quote(name
))
593 item
._url
= entry
.url
# attach URL to directory StateItems
596 grandpa
, this_name
= repos_split(parent
)
598 implied_url
= repos_join(desc
[grandpa
]._url
,
599 svn_url_quote(this_name
))
603 if implied_url
and implied_url
!= entry
.url
:
606 # only recurse into directories found in this entries. remove any
607 # which are not mentioned.
608 unmentioned
= set(dirs
) - set(entries
.keys())
609 for subdir
in unmentioned
:
616 """Describes an individual item within a working copy.
618 Note that the location of this item is not specified. An external
619 mechanism, such as the State class, will provide location information
623 def __init__(self
, contents
=None, props
=None,
624 status
=None, verb
=None, wc_rev
=None, entry_rev
=None,
625 locked
=None, copied
=None, switched
=None, writelocked
=None,
627 # provide an empty prop dict if it wasn't provided
631 ### keep/make these ints one day?
632 if wc_rev
is not None:
635 # Any attribute can be None if not relevant, unless otherwise stated.
637 # A string of content (if the node is a file).
638 self
.contents
= contents
639 # A dictionary mapping prop name to prop value; never None.
641 # A two-character string from the first two columns of 'svn status'.
643 # The action word such as 'Adding' printed by commands like 'svn update'.
645 # The base revision number of the node in the WC, as a string.
647 # This one will be set when we expect the wc_rev to differ from the one
648 # found ni the entries code.
649 self
.entry_rev
= entry_rev
650 # For the following attributes, the value is the status character of that
651 # field from 'svn status', except using value None instead of status ' '.
654 self
.switched
= switched
655 self
.writelocked
= writelocked
656 # Value 'C' or ' ', or None as an expected status meaning 'do not check'.
657 self
.treeconflict
= treeconflict
660 "Make a deep copy of self."
662 vars(new
).update(vars(self
))
663 new
.props
= self
.props
.copy()
666 def tweak(self
, **kw
):
667 for name
, value
in kw
.items():
668 # Refine the revision args (for now) to ensure they are strings.
669 if value
is not None and name
== 'wc_rev':
671 setattr(self
, name
, value
)
673 def __eq__(self
, other
):
674 if not isinstance(other
, StateItem
):
676 v_self
= dict([(k
, v
) for k
, v
in vars(self
).items()
677 if not k
.startswith('_')])
678 v_other
= dict([(k
, v
) for k
, v
in vars(other
).items()
679 if not k
.startswith('_')])
680 if self
.treeconflict
is None:
681 v_other
= v_other
.copy()
682 v_other
['treeconflict'] = None
683 if other
.treeconflict
is None:
684 v_self
= v_self
.copy()
685 v_self
['treeconflict'] = None
686 return v_self
== v_other
688 def __ne__(self
, other
):
689 return not self
.__eq
__(other
)
691 def as_node_tuple(self
, path
):
693 if self
.status
is not None:
694 atts
['status'] = self
.status
695 if self
.verb
is not None:
696 atts
['verb'] = self
.verb
697 if self
.wc_rev
is not None:
698 atts
['wc_rev'] = self
.wc_rev
699 if self
.locked
is not None:
700 atts
['locked'] = self
.locked
701 if self
.copied
is not None:
702 atts
['copied'] = self
.copied
703 if self
.switched
is not None:
704 atts
['switched'] = self
.switched
705 if self
.writelocked
is not None:
706 atts
['writelocked'] = self
.writelocked
707 if self
.treeconflict
is not None:
708 atts
['treeconflict'] = self
.treeconflict
710 return (os
.path
.normpath(path
), self
.contents
, self
.props
, atts
)
713 def from_entry(cls
, entry
):
715 if entry
.schedule
== 1: # svn_wc_schedule_add
717 elif entry
.schedule
== 2: # svn_wc_schedule_delete
719 elif entry
.schedule
== 3: # svn_wc_schedule_replace
721 elif entry
.conflict_old
:
722 ### I'm assuming we only need to check one, rather than all conflict_*
725 ### is this the sufficient? guessing here w/o investigation.
727 status
= status
[0] + 'C'
738 if entry
.revision
== -1:
741 wc_rev
= entry
.revision
744 ### figure out switched
752 return cls(status
=status
,
757 writelocked
=writelocked
,
762 to_relpath
= to_ospath
= lambda path
: path
764 def to_relpath(path
):
765 """Return PATH but with all native path separators changed to '/'."""
766 return path
.replace(os
.sep
, '/')
768 """Return PATH but with each '/' changed to the native path separator."""
769 return path
.replace('/', os
.sep
)
772 def path_to_key(path
, base
):
773 """Return the relative path that represents the absolute path PATH under
774 the absolute path BASE. PATH must be a path under BASE. The returned
775 path has '/' separators."""
779 if base
.endswith(os
.sep
) or base
.endswith('/') or base
.endswith(':'):
780 # Special path format on Windows:
781 # 'C:/' Is a valid root which includes its separator ('C:/file')
782 # 'C:' is a valid root which isn't followed by a separator ('C:file')
784 # In this case, we don't need a separator between the base and the path.
787 # Account for a separator between the base and the relpath we're creating
790 assert path
.startswith(base
), "'%s' is not a prefix of '%s'" % (base
, path
)
791 return to_relpath(path
[len(base
):])
794 def repos_split(repos_relpath
):
795 """Split a repos path into its directory and basename parts."""
796 idx
= repos_relpath
.rfind('/')
798 return '', repos_relpath
799 return repos_relpath
[:idx
], repos_relpath
[idx
+1:]
802 def repos_join(base
, path
):
803 """Join two repos paths. This generally works for URLs too."""
808 return base
+ '/' + path
811 def svn_url_quote(url
):
812 # svn defines a different set of "safe" characters than Python does, so
813 # we need to avoid escaping them. see subr/path.c:uri_char_validity[]
814 return urllib
.quote(url
, "!$&'()*+,-./:=@_~")
819 def text_base_path(file_path
):
820 """Return the path to the text-base file for the versioned file
822 dot_svn
= svntest
.main
.get_admin_name()
823 return os
.path
.join(os
.path
.dirname(file_path
), dot_svn
, 'text-base',
824 os
.path
.basename(file_path
) + '.svn-base')
828 ### probably toss these at some point. or major rework. or something.
829 ### just bootstrapping some changes for now.
832 def item_to_node(path
, item
):
833 tree
= svntest
.tree
.build_generic_tree([item
.as_node_tuple(path
)])
835 assert len(tree
.children
) == 1
836 tree
= tree
.children
[0]
839 ### yanked from tree.compare_trees()
840 def display_nodes(label
, path
, expected
, actual
):
841 'Display two nodes, expected and actual.'
842 expected
= item_to_node(path
, expected
)
843 actual
= item_to_node(path
, actual
)
844 print("=============================================================")
845 print("Expected '%s' and actual '%s' in %s tree are different!"
846 % (expected
.name
, actual
.name
, label
))
847 print("=============================================================")
848 print("EXPECTED NODE TO BE:")
849 print("=============================================================")
851 print("=============================================================")
852 print("ACTUAL NODE FOUND:")
853 print("=============================================================")
856 ### yanked from tree.py
857 def default_singleton_handler(description
, path
, item
):
858 node
= item_to_node(path
, item
)
859 print("Couldn't find node '%s' in %s tree" % (node
.name
, description
))
861 raise svntest
.tree
.SVNTreeUnequal