1 """Git commands and queries for Git"""
2 from __future__
import absolute_import
, division
, print_function
, unicode_literals
6 from io
import StringIO
11 from .git
import STDOUT
12 from .git
import EMPTY_TREE_OID
13 from .git
import OID_LENGTH
15 from .interaction
import Interaction
18 class InvalidRepositoryError(Exception):
22 def add(context
, items
, u
=False):
23 """Run "git add" while preventing argument overflow"""
25 return utils
.slice_fn(
26 items
, lambda paths
: fn('--', force
=True, verbose
=True, u
=u
, *paths
)
30 def apply_diff(context
, filename
):
32 return git
.apply(filename
, index
=True, cached
=True)
35 def apply_diff_to_worktree(context
, filename
):
37 return git
.apply(filename
)
40 def get_branch(context
, branch
):
42 branch
= current_branch(context
)
46 def upstream_remote(context
, branch
=None):
47 """Return the remote associated with the specified branch"""
49 branch
= get_branch(context
, branch
)
50 return config
.get('branch.%s.remote' % branch
)
53 def remote_url(context
, remote
, push
=False):
54 """Return the URL for the specified remote"""
56 url
= config
.get('remote.%s.url' % remote
, '')
58 url
= config
.get('remote.%s.pushurl' % remote
, url
)
62 def diff_index_filenames(context
, ref
):
64 Return a diff of filenames that have been modified relative to the index
67 out
= git
.diff_index(ref
, name_only
=True, z
=True, _readonly
=True)[STDOUT
]
68 return _parse_diff_filenames(out
)
71 def diff_filenames(context
, *args
):
72 """Return a list of filenames that have been modified"""
75 name_only
=True, no_commit_id
=True, r
=True, z
=True, _readonly
=True, *args
77 return _parse_diff_filenames(out
)
80 def listdir(context
, dirname
, ref
='HEAD'):
81 """Get the contents of a directory according to Git
83 Query Git for the content of a directory, taking ignored
90 # first, parse git ls-tree to get the tracked files
91 # in a list of (type, path) tuples
92 entries
= ls_tree(context
, dirname
, ref
=ref
)
94 if entry
[0][0] == 't': # tree
97 files
.append(entry
[1])
99 # gather untracked files
100 untracked
= untracked_files(context
, paths
=[dirname
], directory
=True)
101 for path
in untracked
:
102 if path
.endswith('/'):
103 dirs
.append(path
[:-1])
113 def diff(context
, args
):
114 """Return a list of filenames for the given diff arguments
116 :param args: list of arguments to pass to "git diff --name-only"
120 out
= git
.diff(name_only
=True, z
=True, _readonly
=True, *args
)[STDOUT
]
121 return _parse_diff_filenames(out
)
124 def _parse_diff_filenames(out
):
126 return out
[:-1].split('\0')
130 def tracked_files(context
, *args
):
131 """Return the names of all files in the repository"""
133 out
= git
.ls_files('--', *args
, z
=True, _readonly
=True)[STDOUT
]
135 return sorted(out
[:-1].split('\0'))
139 def all_files(context
, *args
):
140 """Returns a sorted list of all files, including untracked files."""
142 ls_files
= git
.ls_files(
144 z
=True, cached
=True, others
=True, exclude_standard
=True, _readonly
=True
146 return sorted([f
for f
in ls_files
.split('\0') if f
])
149 class _current_branch(object):
150 """Cache for current_branch()"""
157 _current_branch
.key
= None
160 def current_branch(context
):
161 """Return the current branch"""
163 head
= git
.git_path('HEAD')
165 key
= core
.stat(head
).st_mtime
166 if _current_branch
.key
== key
:
167 return _current_branch
.value
169 # OSError means we can't use the stat cache
172 status
, data
, _
= git
.rev_parse('HEAD', symbolic_full_name
=True, _readonly
=True)
174 # git init -- read .git/HEAD. We could do this unconditionally...
175 data
= _read_git_head(context
, head
)
177 for refs_prefix
in ('refs/heads/', 'refs/remotes/', 'refs/tags/'):
178 if data
.startswith(refs_prefix
):
179 value
= data
[len(refs_prefix
) :]
180 _current_branch
.key
= key
181 _current_branch
.value
= value
187 def _read_git_head(context
, head
, default
='main'):
188 """Pure-python .git/HEAD reader"""
189 # Common .git/HEAD "ref: refs/heads/main" files
191 islink
= core
.islink(head
)
192 if core
.isfile(head
) and not islink
:
193 data
= core
.read(head
).rstrip()
195 if data
.startswith(ref_prefix
):
196 return data
[len(ref_prefix
) :]
199 # Legacy .git/HEAD symlinks
201 refs_heads
= core
.realpath(git
.git_path('refs', 'heads'))
202 path
= core
.abspath(head
).replace('\\', '/')
203 if path
.startswith(refs_heads
+ '/'):
204 return path
[len(refs_heads
) + 1 :]
209 def branch_list(context
, remote
=False):
211 Return a list of local or remote branches
213 This explicitly removes HEAD from the list of remote branches.
217 return for_each_ref_basename(context
, 'refs/remotes')
218 return for_each_ref_basename(context
, 'refs/heads')
221 def _version_sort(context
, key
='version:refname'):
222 if version
.check_git(context
, 'version-sort'):
229 def for_each_ref_basename(context
, refs
):
230 """Return refs starting with 'refs'."""
232 sort
= _version_sort(context
)
233 _
, out
, _
= git
.for_each_ref(refs
, format
='%(refname)', sort
=sort
, _readonly
=True)
234 output
= out
.splitlines()
235 non_heads
= [x
for x
in output
if not x
.endswith('/HEAD')]
236 offset
= len(refs
) + 1
237 return [x
[offset
:] for x
in non_heads
]
241 return (x
, len(x
) + 1, y
)
244 def all_refs(context
, split
=False, sort_key
='version:refname'):
245 """Return a tuple of (local branches, remote branches, tags)."""
252 triple('refs/tags', tags
),
253 triple('refs/heads', local_branches
),
254 triple('refs/remotes', remote_branches
),
256 sort
= _version_sort(context
, key
=sort_key
)
257 _
, out
, _
= git
.for_each_ref(format
='%(refname)', sort
=sort
, _readonly
=True)
258 for ref
in out
.splitlines():
259 for prefix
, prefix_len
, dst
in query
:
260 if ref
.startswith(prefix
) and not ref
.endswith('/HEAD'):
261 dst
.append(ref
[prefix_len
:])
265 return local_branches
, remote_branches
, tags
266 return local_branches
+ remote_branches
+ tags
269 def tracked_branch(context
, branch
=None):
270 """Return the remote branch associated with 'branch'."""
272 branch
= current_branch(context
)
276 remote
= config
.get('branch.%s.remote' % branch
)
279 merge_ref
= config
.get('branch.%s.merge' % branch
)
282 refs_heads
= 'refs/heads/'
283 if merge_ref
.startswith(refs_heads
):
284 return remote
+ '/' + merge_ref
[len(refs_heads
) :]
288 def parse_remote_branch(branch
):
289 """Split a remote branch apart into (remote, name) components"""
290 rgx
= re
.compile(r
'^(?P<remote>[^/]+)/(?P<branch>.+)$')
291 match
= rgx
.match(branch
)
295 remote
= match
.group('remote')
296 branch
= match
.group('branch')
297 return (remote
, branch
)
300 def untracked_files(context
, paths
=None, **kwargs
):
301 """Returns a sorted list of untracked files."""
305 args
= ['--'] + paths
307 z
=True, others
=True, exclude_standard
=True, _readonly
=True, *args
, **kwargs
310 return out
[:-1].split('\0')
314 def tag_list(context
):
315 """Return a list of tags."""
316 result
= for_each_ref_basename(context
, 'refs/tags')
321 def log(git
, *args
, **kwargs
):
324 no_abbrev_commit
=True,
332 def commit_diff(context
, oid
):
334 return log(git
, '-1', oid
, '--') + '\n\n' + oid_diff(context
, oid
)
340 def update_diff_overrides(space_at_eol
, space_change
, all_space
, function_context
):
341 _diff_overrides
['ignore_space_at_eol'] = space_at_eol
342 _diff_overrides
['ignore_space_change'] = space_change
343 _diff_overrides
['ignore_all_space'] = all_space
344 _diff_overrides
['function_context'] = function_context
347 def common_diff_opts(context
):
349 # Default to --patience when diff.algorithm is unset
350 patience
= not config
.get('diff.algorithm', default
='')
351 submodule
= version
.check_git(context
, 'diff-submodule')
353 'patience': patience
,
354 'submodule': submodule
,
357 'unified': config
.get('gui.diffcontext', default
=3),
360 opts
.update(_diff_overrides
)
364 def _add_filename(args
, filename
):
366 args
.extend(['--', filename
])
369 def oid_diff(context
, oid
, filename
=None):
370 """Return the diff for an oid"""
371 # Naively "$oid^!" is what we'd like to use but that doesn't
372 # give the correct result for merges--the diff is reversed.
373 # Be explicit and compare oid against its first parent.
375 args
= [oid
+ '~', oid
]
376 opts
= common_diff_opts(context
)
377 _add_filename(args
, filename
)
378 status
, out
, _
= git
.diff(*args
, **opts
)
380 # We probably don't have "$oid~" because this is the root commit.
381 # "git show" is clever enough to handle the root commit.
383 _add_filename(args
, filename
)
384 _
, out
, _
= git
.show(pretty
='format:', _readonly
=True, *args
, **opts
)
389 def diff_info(context
, oid
, filename
=None):
391 decoded
= log(git
, '-1', oid
, '--', pretty
='format:%b').strip()
394 return decoded
+ oid_diff(context
, oid
, filename
=filename
)
397 # pylint: disable=too-many-arguments
408 with_diff_header
=False,
409 suppress_header
=True,
413 "Invokes git diff on a filepath."
417 ref
, endref
= commit
+ '^', commit
420 argv
.append('%s..%s' % (ref
, endref
))
422 argv
.extend(utils
.shell_split(ref
.strip()))
423 elif head
and amending
and cached
:
428 argv
.append('--no-index')
429 argv
.append(os
.devnull
)
430 argv
.append(filename
)
433 if isinstance(filename
, (list, tuple)):
434 argv
.extend(filename
)
436 argv
.append(filename
)
437 encoding
= cfg
.file_encoding(filename
)
439 status
, out
, _
= git
.diff(
445 **common_diff_opts(context
)
448 success
= status
== 0
450 # Diff will return 1 when comparing untracked file and it has change,
451 # therefore we will check for diff header from output to differentiate
452 # from actual error such as file not found.
453 if untracked
and status
== 1:
455 _
, second
, _
= out
.split('\n', 2)
458 success
= second
.startswith('new file mode ')
466 result
= extract_diff_header(deleted
, with_diff_header
, suppress_header
, out
)
467 return core
.UStr(result
, out
.encoding
)
470 def extract_diff_header(deleted
, with_diff_header
, suppress_header
, diffoutput
):
471 """Split a diff into a header section and payload section"""
473 if diffoutput
.startswith('Submodule'):
475 return ('', diffoutput
)
479 del_tag
= 'deleted file mode '
484 for line
in diffoutput
.split('\n'):
485 if not start
and line
[:2] == '@@' and '@@' in line
[2:]:
487 if start
or (deleted
and del_tag
in line
):
488 output
.write(line
+ '\n')
491 headers
.write(line
+ '\n')
492 elif not suppress_header
:
493 output
.write(line
+ '\n')
495 output_text
= output
.getvalue()
498 headers_text
= headers
.getvalue()
502 return (headers_text
, output_text
)
506 def format_patchsets(context
, to_export
, revs
, output
='patches'):
508 Group contiguous revision selection into patchsets
510 Exists to handle multi-selection.
511 Multiple disparate ranges in the revision selection
512 are grouped into continuous lists.
519 cur_rev
= to_export
[0]
520 cur_rev_idx
= revs
.index(cur_rev
)
522 patches_to_export
= [[cur_rev
]]
525 # Group the patches into continuous sets
526 for rev
in to_export
[1:]:
527 # Limit the search to the current neighborhood for efficiency
529 rev_idx
= revs
[cur_rev_idx
:].index(rev
)
530 rev_idx
+= cur_rev_idx
532 rev_idx
= revs
.index(rev
)
534 if rev_idx
== cur_rev_idx
+ 1:
535 patches_to_export
[patchset_idx
].append(rev
)
538 patches_to_export
.append([rev
])
539 cur_rev_idx
= rev_idx
542 # Export each patchsets
544 for patchset
in patches_to_export
:
545 stat
, out
, err
= export_patchset(
552 patch_with_stat
=True,
557 status
= max(stat
, status
)
558 return (status
, '\n'.join(outs
), '\n'.join(errs
))
561 def export_patchset(context
, start
, end
, output
='patches', **kwargs
):
562 """Export patches from start^ to end."""
564 return git
.format_patch('-o', output
, start
+ '^..' + end
, **kwargs
)
567 def reset_paths(context
, head
, items
):
568 """Run "git reset" while preventing argument overflow"""
569 items
= list(set(items
))
570 fn
= context
.git
.reset
571 status
, out
, err
= utils
.slice_fn(items
, lambda paths
: fn(head
, '--', *paths
))
572 return (status
, out
, err
)
575 def unstage_paths(context
, args
, head
='HEAD'):
576 """Unstage paths while accounting for git init"""
577 status
, out
, err
= reset_paths(context
, head
, args
)
579 # handle git init: we have to use 'git rm --cached'
580 # detect this condition by checking if the file is still staged
581 return untrack_paths(context
, args
)
582 return (status
, out
, err
)
585 def untrack_paths(context
, args
):
587 return (-1, N_('Nothing to do'), '')
589 return git
.update_index('--', force_remove
=True, *set(args
))
593 context
, head
='HEAD', update_index
=False, display_untracked
=True, paths
=None
595 """Return a dict of files in various states of being
597 :rtype: dict, keys are staged, unstaged, untracked, unmerged,
598 changed_upstream, and submodule.
603 git
.update_index(refresh
=True)
605 staged
, unmerged
, staged_deleted
, staged_submods
= diff_index(
606 context
, head
, paths
=paths
608 modified
, unstaged_deleted
, modified_submods
= diff_worktree(context
, paths
)
609 if display_untracked
:
610 untracked
= untracked_files(context
, paths
=paths
)
614 # Remove unmerged paths from the modified list
616 unmerged_set
= set(unmerged
)
617 modified
= [path
for path
in modified
if path
not in unmerged_set
]
619 # Look for upstream modified files if this is a tracking branch
620 upstream_changed
= diff_upstream(context
, head
)
627 upstream_changed
.sort()
631 'modified': modified
,
632 'unmerged': unmerged
,
633 'untracked': untracked
,
634 'upstream_changed': upstream_changed
,
635 'staged_deleted': staged_deleted
,
636 'unstaged_deleted': unstaged_deleted
,
637 'submodules': staged_submods | modified_submods
,
641 def _parse_raw_diff(out
):
643 info
, path
, out
= out
.split('\0', 2)
645 is_submodule
= '160000' in info
[1:14]
646 yield (path
, status
, is_submodule
)
649 def diff_index(context
, head
, cached
=True, paths
=None):
658 args
= [head
, '--'] + paths
659 status
, out
, _
= git
.diff_index(cached
=cached
, z
=True, _readonly
=True, *args
)
662 args
[0] = EMPTY_TREE_OID
663 status
, out
, _
= git
.diff_index(cached
=cached
, z
=True, _readonly
=True, *args
)
665 for path
, status
, is_submodule
in _parse_raw_diff(out
):
673 unmerged
.append(path
)
675 return staged
, unmerged
, deleted
, submodules
678 def diff_worktree(context
, paths
=None):
686 args
= ['--'] + paths
687 status
, out
, _
= git
.diff_files(z
=True, _readonly
=True, *args
)
688 for path
, status
, is_submodule
in _parse_raw_diff(out
):
692 modified
.append(path
)
696 return modified
, deleted
, submodules
699 def diff_upstream(context
, head
):
700 """Given `ref`, return $(git merge-base ref HEAD)..ref."""
701 tracked
= tracked_branch(context
)
704 base
= merge_base(context
, head
, tracked
)
705 return diff_filenames(context
, base
, tracked
)
708 def list_submodule(context
):
709 """Return submodules in the format(state, sha1, path, describe)"""
711 status
, data
, _
= git
.submodule('status')
713 if status
== 0 and data
:
714 data
= data
.splitlines()
715 # see git submodule status
716 # TODO better separation
718 state
= line
[0].strip()
719 sha1
= line
[1 : OID_LENGTH
+ 1]
720 left_bracket
= line
.find('(', OID_LENGTH
+ 3)
721 if left_bracket
== -1:
722 left_bracket
= len(line
) + 1
723 path
= line
[OID_LENGTH
+ 2 : left_bracket
- 1]
724 describe
= line
[left_bracket
+ 1 : -1]
725 ret
.append((state
, sha1
, path
, describe
))
729 def merge_base(context
, head
, ref
):
730 """Return the merge-base of head and ref"""
732 return git
.merge_base(head
, ref
, _readonly
=True)[STDOUT
]
735 def merge_base_parent(context
, branch
):
736 tracked
= tracked_branch(context
, branch
=branch
)
743 def parse_ls_tree(context
, rev
):
744 """Return a list of (mode, type, oid, path) tuples."""
747 lines
= git
.ls_tree(rev
, r
=True, _readonly
=True)[STDOUT
].splitlines()
748 regex
= re
.compile(r
'^(\d+)\W(\w+)\W(\w+)[ \t]+(.*)$')
750 match
= regex
.match(line
)
752 mode
= match
.group(1)
753 objtype
= match
.group(2)
755 filename
= match
.group(4)
768 def ls_tree(context
, path
, ref
='HEAD'):
769 """Return a parsed git ls-tree result for a single directory"""
772 status
, out
, _
= git
.ls_tree(
773 ref
, '--', path
, z
=True, full_tree
=True, _readonly
=True
775 if status
== 0 and out
:
776 path_offset
= 6 + 1 + 4 + 1 + OID_LENGTH
+ 1
777 for line
in out
[:-1].split('\0'):
779 # .....6 ...4 ......................................40
780 # 040000 tree c127cde9a0c644a3a8fef449a244f47d5272dfa6 relative
781 # 100644 blob 139e42bf4acaa4927ec9be1ec55a252b97d3f1e2 relative/path
782 # 0..... 7... 12...................................... 53
783 # path_offset = 6 + 1 + 4 + 1 + OID_LENGTH(40) + 1
785 relpath
= line
[path_offset
:]
786 result
.append((objtype
, relpath
))
791 # A regex for matching the output of git(log|rev-list) --pretty=oneline
792 REV_LIST_REGEX
= re
.compile(r
'^([0-9a-f]{40}) (.*)$')
795 def parse_rev_list(raw_revs
):
796 """Parse `git log --pretty=online` output into (oid, summary) pairs."""
798 for line
in raw_revs
.splitlines():
799 match
= REV_LIST_REGEX
.match(line
)
801 rev_id
= match
.group(1)
802 summary
= match
.group(2)
812 # pylint: disable=redefined-builtin
813 def log_helper(context
, all
=False, extra_args
=None):
814 """Return parallel arrays containing oids and summaries."""
821 output
= log(git
, pretty
='oneline', all
=all
, *args
)
822 for line
in output
.splitlines():
823 match
= REV_LIST_REGEX
.match(line
)
825 revs
.append(match
.group(1))
826 summaries
.append(match
.group(2))
827 return (revs
, summaries
)
830 def rev_list_range(context
, start
, end
):
831 """Return (oid, summary) pairs between start and end."""
833 revrange
= '%s..%s' % (start
, end
)
834 out
= git
.rev_list(revrange
, pretty
='oneline', _readonly
=True)[STDOUT
]
835 return parse_rev_list(out
)
838 def commit_message_path(context
):
839 """Return the path to .git/GIT_COLA_MSG"""
841 path
= git
.git_path('GIT_COLA_MSG')
842 if core
.exists(path
):
847 def merge_message_path(context
):
848 """Return the path to .git/MERGE_MSG or .git/SQUASH_MSG."""
850 for basename
in ('MERGE_MSG', 'SQUASH_MSG'):
851 path
= git
.git_path(basename
)
852 if core
.exists(path
):
857 def prepare_commit_message_hook(context
):
858 """Run the cola.preparecommitmessagehook to prepare the commit message"""
860 default_hook
= config
.hooks_path('cola-prepare-commit-msg')
861 return config
.get('cola.preparecommitmessagehook', default
=default_hook
)
864 def abort_merge(context
):
865 """Abort a merge by reading the tree at HEAD."""
868 status
, out
, err
= git
.read_tree('HEAD', reset
=True, u
=True, v
=True, _readonly
=True)
870 merge_head
= git
.git_path('MERGE_HEAD')
871 if core
.exists(merge_head
):
872 core
.unlink(merge_head
)
873 # remove MERGE_MESSAGE, etc.
874 merge_msg_path
= merge_message_path(context
)
875 while merge_msg_path
:
876 core
.unlink(merge_msg_path
)
877 merge_msg_path
= merge_message_path(context
)
878 return status
, out
, err
881 def strip_remote(remotes
, remote_branch
):
882 for remote
in remotes
:
883 prefix
= remote
+ '/'
884 if remote_branch
.startswith(prefix
):
885 return remote_branch
[len(prefix
) :]
886 return remote_branch
.split('/', 1)[-1]
889 def parse_refs(context
, argv
):
890 """Parse command-line arguments into object IDs"""
892 status
, out
, _
= git
.rev_parse(_readonly
=True, *argv
)
894 oids
= [oid
for oid
in out
.splitlines() if oid
]
900 def prev_commitmsg(context
, *args
):
901 """Queries git for the latest commit message."""
904 '-1', no_color
=True, pretty
='format:%s%n%n%b', _readonly
=True, *args
908 def rev_parse(context
, name
):
909 """Call git rev-parse and return the output"""
911 status
, out
, _
= git
.rev_parse(name
, _readonly
=True)
919 def write_blob(context
, oid
, filename
):
920 """Write a blob to a temporary file and return the path
922 Modern versions of Git allow invoking filters. Older versions
923 get the object content as-is.
926 if version
.check_git(context
, 'cat-file-filters-path'):
927 return cat_file_to_path(context
, filename
, oid
)
928 return cat_file_blob(context
, filename
, oid
)
931 def cat_file_blob(context
, filename
, oid
):
932 return cat_file(context
, filename
, 'blob', oid
)
935 def cat_file_to_path(context
, filename
, oid
):
936 return cat_file(context
, filename
, oid
, path
=filename
, filters
=True)
939 def cat_file(context
, filename
, *args
, **kwargs
):
940 """Redirect git cat-file output to a path"""
943 # Use the original filename in the suffix so that the generated filename
944 # has the correct extension, and so that it resembles the original name.
945 basename
= os
.path
.basename(filename
)
946 suffix
= '-' + basename
# ensures the correct filename extension
947 path
= utils
.tmp_filename('blob', suffix
=suffix
)
948 with
open(path
, 'wb') as fp
:
949 status
, out
, err
= git
.cat_file(
950 _raw
=True, _readonly
=True, _stdout
=fp
, *args
, **kwargs
952 Interaction
.command(N_('Error'), 'git cat-file', status
, out
, err
)
960 def write_blob_path(context
, head
, oid
, filename
):
961 """Use write_blob() when modern git is available"""
962 if version
.check_git(context
, 'cat-file-filters-path'):
963 return write_blob(context
, oid
, filename
)
964 return cat_file_blob(context
, filename
, head
+ ':' + filename
)
967 def annex_path(context
, head
, filename
):
968 """Return the git-annex path for a filename at the specified commit"""
973 # unfortunately there's no way to filter this down to a single path
974 # so we just have to scan all reported paths
975 status
, out
, _
= git
.annex('findref', '--json', head
, _readonly
=True)
977 for line
in out
.splitlines():
978 info
= json
.loads(line
)
980 annex_file
= info
['file']
981 except (ValueError, KeyError):
983 # we only care about this file so we can skip the rest
984 if annex_file
== filename
:
987 key
= annex_info
.get('key', '')
989 status
, out
, _
= git
.annex('contentlocation', key
, _readonly
=True)
990 if status
== 0 and os
.path
.exists(out
):
996 def is_binary(context
, filename
):
997 cfg_is_binary
= context
.cfg
.is_binary(filename
)
998 if cfg_is_binary
is not None:
1000 # This is the same heuristic as xdiff-interface.c:buffer_is_binary().
1003 result
= core
.read(filename
, size
=size
, encoding
='bytes')
1004 except (IOError, OSError):
1007 return b
'\0' in result
1010 def is_valid_ref(context
, ref
):
1011 """Is the provided Git ref a valid refname?"""
1012 status
, _
, _
= context
.git
.rev_parse(ref
, quiet
=True, verify
=True, _readonly
=True)