1 """Git commands and queries for Git"""
2 from __future__
import division
, absolute_import
, unicode_literals
6 from io
import StringIO
11 from .git
import STDOUT
12 from .git
import EMPTY_TREE_OID
13 from .git
import OID_LENGTH
15 from .interaction
import Interaction
18 class InvalidRepositoryError(Exception):
22 def add(context
, items
, u
=False):
23 """Run "git add" while preventing argument overflow"""
25 return utils
.slice_fn(
26 items
, lambda paths
: fn('--', force
=True, verbose
=True, u
=u
, *paths
)
30 def apply_diff(context
, filename
):
32 return git
.apply(filename
, index
=True, cached
=True)
35 def apply_diff_to_worktree(context
, filename
):
37 return git
.apply(filename
)
40 def get_branch(context
, branch
):
42 branch
= current_branch(context
)
46 def upstream_remote(context
, branch
=None):
47 """Return the remote associated with the specified branch"""
49 branch
= get_branch(context
, branch
)
50 return config
.get('branch.%s.remote' % branch
)
53 def remote_url(context
, remote
, push
=False):
54 """Return the URL for the specified remote"""
56 url
= config
.get('remote.%s.url' % remote
, '')
58 url
= config
.get('remote.%s.pushurl' % remote
, url
)
62 def diff_index_filenames(context
, ref
):
64 Return a diff of filenames that have been modified relative to the index
67 out
= git
.diff_index(ref
, name_only
=True, z
=True)[STDOUT
]
68 return _parse_diff_filenames(out
)
71 def diff_filenames(context
, *args
):
72 """Return a list of filenames that have been modified"""
75 name_only
=True, no_commit_id
=True, r
=True, z
=True, _readonly
=True, *args
77 return _parse_diff_filenames(out
)
80 def listdir(context
, dirname
, ref
='HEAD'):
81 """Get the contents of a directory according to Git
83 Query Git for the content of a directory, taking ignored
90 # first, parse git ls-tree to get the tracked files
91 # in a list of (type, path) tuples
92 entries
= ls_tree(context
, dirname
, ref
=ref
)
94 if entry
[0][0] == 't': # tree
97 files
.append(entry
[1])
99 # gather untracked files
100 untracked
= untracked_files(context
, paths
=[dirname
], directory
=True)
101 for path
in untracked
:
102 if path
.endswith('/'):
103 dirs
.append(path
[:-1])
113 def diff(context
, args
):
114 """Return a list of filenames for the given diff arguments
116 :param args: list of arguments to pass to "git diff --name-only"
120 out
= git
.diff(name_only
=True, z
=True, *args
)[STDOUT
]
121 return _parse_diff_filenames(out
)
124 def _parse_diff_filenames(out
):
126 return out
[:-1].split('\0')
130 def tracked_files(context
, *args
):
131 """Return the names of all files in the repository"""
133 out
= git
.ls_files('--', *args
, z
=True)[STDOUT
]
135 return sorted(out
[:-1].split('\0'))
139 def all_files(context
, *args
):
140 """Returns a sorted list of all files, including untracked files."""
142 ls_files
= git
.ls_files(
143 '--', *args
, z
=True, cached
=True, others
=True, exclude_standard
=True
145 return sorted([f
for f
in ls_files
.split('\0') if f
])
148 class _current_branch(object):
149 """Cache for current_branch()"""
156 _current_branch
.key
= None
159 def current_branch(context
):
160 """Return the current branch"""
162 head
= git
.git_path('HEAD')
164 key
= core
.stat(head
).st_mtime
165 if _current_branch
.key
== key
:
166 return _current_branch
.value
168 # OSError means we can't use the stat cache
171 status
, data
, _
= git
.rev_parse('HEAD', symbolic_full_name
=True)
173 # git init -- read .git/HEAD. We could do this unconditionally...
174 data
= _read_git_head(context
, head
)
176 for refs_prefix
in ('refs/heads/', 'refs/remotes/', 'refs/tags/'):
177 if data
.startswith(refs_prefix
):
178 value
= data
[len(refs_prefix
) :]
179 _current_branch
.key
= key
180 _current_branch
.value
= value
186 def _read_git_head(context
, head
, default
='main'):
187 """Pure-python .git/HEAD reader"""
188 # Common .git/HEAD "ref: refs/heads/main" files
190 islink
= core
.islink(head
)
191 if core
.isfile(head
) and not islink
:
192 data
= core
.read(head
).rstrip()
194 if data
.startswith(ref_prefix
):
195 return data
[len(ref_prefix
) :]
198 # Legacy .git/HEAD symlinks
200 refs_heads
= core
.realpath(git
.git_path('refs', 'heads'))
201 path
= core
.abspath(head
).replace('\\', '/')
202 if path
.startswith(refs_heads
+ '/'):
203 return path
[len(refs_heads
) + 1 :]
208 def branch_list(context
, remote
=False):
210 Return a list of local or remote branches
212 This explicitly removes HEAD from the list of remote branches.
216 return for_each_ref_basename(context
, 'refs/remotes')
217 return for_each_ref_basename(context
, 'refs/heads')
220 def _version_sort(context
, key
='version:refname'):
221 if version
.check_git(context
, 'version-sort'):
228 def for_each_ref_basename(context
, refs
):
229 """Return refs starting with 'refs'."""
231 sort
= _version_sort(context
)
232 _
, out
, _
= git
.for_each_ref(refs
, format
='%(refname)', sort
=sort
, _readonly
=True)
233 output
= out
.splitlines()
234 non_heads
= [x
for x
in output
if not x
.endswith('/HEAD')]
235 offset
= len(refs
) + 1
236 return [x
[offset
:] for x
in non_heads
]
240 return (x
, len(x
) + 1, y
)
243 def all_refs(context
, split
=False, sort_key
='version:refname'):
244 """Return a tuple of (local branches, remote branches, tags)."""
251 triple('refs/tags', tags
),
252 triple('refs/heads', local_branches
),
253 triple('refs/remotes', remote_branches
),
255 sort
= _version_sort(context
, key
=sort_key
)
256 _
, out
, _
= git
.for_each_ref(format
='%(refname)', sort
=sort
, _readonly
=True)
257 for ref
in out
.splitlines():
258 for prefix
, prefix_len
, dst
in query
:
259 if ref
.startswith(prefix
) and not ref
.endswith('/HEAD'):
260 dst
.append(ref
[prefix_len
:])
264 return local_branches
, remote_branches
, tags
265 return local_branches
+ remote_branches
+ tags
268 def tracked_branch(context
, branch
=None):
269 """Return the remote branch associated with 'branch'."""
271 branch
= current_branch(context
)
275 remote
= config
.get('branch.%s.remote' % branch
)
278 merge_ref
= config
.get('branch.%s.merge' % branch
)
281 refs_heads
= 'refs/heads/'
282 if merge_ref
.startswith(refs_heads
):
283 return remote
+ '/' + merge_ref
[len(refs_heads
) :]
287 def parse_remote_branch(branch
):
288 """Split a remote branch apart into (remote, name) components"""
289 rgx
= re
.compile(r
'^(?P<remote>[^/]+)/(?P<branch>.+)$')
290 match
= rgx
.match(branch
)
294 remote
= match
.group('remote')
295 branch
= match
.group('branch')
296 return (remote
, branch
)
299 def untracked_files(context
, paths
=None, **kwargs
):
300 """Returns a sorted list of untracked files."""
304 args
= ['--'] + paths
305 out
= git
.ls_files(z
=True, others
=True, exclude_standard
=True, *args
, **kwargs
)[
309 return out
[:-1].split('\0')
313 def tag_list(context
):
314 """Return a list of tags."""
315 result
= for_each_ref_basename(context
, 'refs/tags')
320 def log(git
, *args
, **kwargs
):
323 no_abbrev_commit
=True,
331 def commit_diff(context
, oid
):
333 return log(git
, '-1', oid
, '--') + '\n\n' + oid_diff(context
, oid
)
339 def update_diff_overrides(space_at_eol
, space_change
, all_space
, function_context
):
340 _diff_overrides
['ignore_space_at_eol'] = space_at_eol
341 _diff_overrides
['ignore_space_change'] = space_change
342 _diff_overrides
['ignore_all_space'] = all_space
343 _diff_overrides
['function_context'] = function_context
346 def common_diff_opts(context
):
348 # Default to --patience when diff.algorithm is unset
349 patience
= not config
.get('diff.algorithm', default
='')
350 submodule
= version
.check_git(context
, 'diff-submodule')
352 'patience': patience
,
353 'submodule': submodule
,
356 'unified': config
.get('gui.diffcontext', default
=3),
359 opts
.update(_diff_overrides
)
363 def _add_filename(args
, filename
):
365 args
.extend(['--', filename
])
368 def oid_diff(context
, oid
, filename
=None):
369 """Return the diff for an oid"""
370 # Naively "$oid^!" is what we'd like to use but that doesn't
371 # give the correct result for merges--the diff is reversed.
372 # Be explicit and compare oid against its first parent.
374 args
= [oid
+ '~', oid
]
375 opts
= common_diff_opts(context
)
376 _add_filename(args
, filename
)
377 status
, out
, _
= git
.diff(*args
, **opts
)
379 # We probably don't have "$oid~" because this is the root commit.
380 # "git show" is clever enough to handle the root commit.
382 _add_filename(args
, filename
)
383 _
, out
, _
= git
.show(pretty
='format:', _readonly
=True, *args
, **opts
)
388 def diff_info(context
, oid
, filename
=None):
390 decoded
= log(git
, '-1', oid
, '--', pretty
='format:%b').strip()
393 return decoded
+ oid_diff(context
, oid
, filename
=filename
)
396 # pylint: disable=too-many-arguments
407 with_diff_header
=False,
408 suppress_header
=True,
412 "Invokes git diff on a filepath."
416 ref
, endref
= commit
+ '^', commit
419 argv
.append('%s..%s' % (ref
, endref
))
421 argv
.extend(context
, utils
.shell_split(ref
.strip()))
422 elif head
and amending
and cached
:
427 argv
.append('--no-index')
428 argv
.append(os
.devnull
)
429 argv
.append(filename
)
432 if isinstance(filename
, (list, tuple)):
433 argv
.extend(filename
)
435 argv
.append(filename
)
436 encoding
= cfg
.file_encoding(filename
)
438 status
, out
, _
= git
.diff(
444 **common_diff_opts(context
)
447 success
= status
== 0
449 # Diff will return 1 when comparing untracked file and it has change,
450 # therefore we will check for diff header from output to differentiate
451 # from actual error such as file not found.
452 if untracked
and status
== 1:
454 _
, second
, _
= out
.split('\n', 2)
457 success
= second
.startswith('new file mode ')
465 result
= extract_diff_header(deleted
, with_diff_header
, suppress_header
, out
)
466 return core
.UStr(result
, out
.encoding
)
469 def extract_diff_header(deleted
, with_diff_header
, suppress_header
, diffoutput
):
470 """Split a diff into a header section and payload section"""
472 if diffoutput
.startswith('Submodule'):
474 return ('', diffoutput
)
478 del_tag
= 'deleted file mode '
483 for line
in diffoutput
.split('\n'):
484 if not start
and line
[:2] == '@@' and '@@' in line
[2:]:
486 if start
or (deleted
and del_tag
in line
):
487 output
.write(line
+ '\n')
490 headers
.write(line
+ '\n')
491 elif not suppress_header
:
492 output
.write(line
+ '\n')
494 output_text
= output
.getvalue()
497 headers_text
= headers
.getvalue()
501 return (headers_text
, output_text
)
505 def format_patchsets(context
, to_export
, revs
, output
='patches'):
507 Group contiguous revision selection into patchsets
509 Exists to handle multi-selection.
510 Multiple disparate ranges in the revision selection
511 are grouped into continuous lists.
518 cur_rev
= to_export
[0]
519 cur_rev_idx
= revs
.index(cur_rev
)
521 patches_to_export
= [[cur_rev
]]
524 # Group the patches into continuous sets
525 for rev
in to_export
[1:]:
526 # Limit the search to the current neighborhood for efficiency
528 rev_idx
= revs
[cur_rev_idx
:].index(rev
)
529 rev_idx
+= cur_rev_idx
531 rev_idx
= revs
.index(rev
)
533 if rev_idx
== cur_rev_idx
+ 1:
534 patches_to_export
[patchset_idx
].append(rev
)
537 patches_to_export
.append([rev
])
538 cur_rev_idx
= rev_idx
541 # Export each patchsets
543 for patchset
in patches_to_export
:
544 stat
, out
, err
= export_patchset(
551 patch_with_stat
=True,
556 status
= max(stat
, status
)
557 return (status
, '\n'.join(outs
), '\n'.join(errs
))
560 def export_patchset(context
, start
, end
, output
='patches', **kwargs
):
561 """Export patches from start^ to end."""
563 return git
.format_patch('-o', output
, start
+ '^..' + end
, **kwargs
)
566 def reset_paths(context
, head
, items
):
567 """Run "git reset" while preventing argument overflow"""
568 items
= list(set(items
))
569 fn
= context
.git
.reset
570 status
, out
, err
= utils
.slice_fn(items
, lambda paths
: fn(head
, '--', *paths
))
571 return (status
, out
, err
)
574 def unstage_paths(context
, args
, head
='HEAD'):
575 """Unstage paths while accounting for git init"""
576 status
, out
, err
= reset_paths(context
, head
, args
)
578 # handle git init: we have to use 'git rm --cached'
579 # detect this condition by checking if the file is still staged
580 return untrack_paths(context
, args
)
581 return (status
, out
, err
)
584 def untrack_paths(context
, args
):
586 return (-1, N_('Nothing to do'), '')
588 return git
.update_index('--', force_remove
=True, *set(args
))
592 context
, head
='HEAD', update_index
=False, display_untracked
=True, paths
=None
594 """Return a dict of files in various states of being
596 :rtype: dict, keys are staged, unstaged, untracked, unmerged,
597 changed_upstream, and submodule.
602 git
.update_index(refresh
=True)
604 staged
, unmerged
, staged_deleted
, staged_submods
= diff_index(
605 context
, head
, paths
=paths
607 modified
, unstaged_deleted
, modified_submods
= diff_worktree(context
, paths
)
608 if display_untracked
:
609 untracked
= untracked_files(context
, paths
=paths
)
613 # Remove unmerged paths from the modified list
615 unmerged_set
= set(unmerged
)
616 modified
= [path
for path
in modified
if path
not in unmerged_set
]
618 # Look for upstream modified files if this is a tracking branch
619 upstream_changed
= diff_upstream(context
, head
)
626 upstream_changed
.sort()
630 'modified': modified
,
631 'unmerged': unmerged
,
632 'untracked': untracked
,
633 'upstream_changed': upstream_changed
,
634 'staged_deleted': staged_deleted
,
635 'unstaged_deleted': unstaged_deleted
,
636 'submodules': staged_submods | modified_submods
,
640 def _parse_raw_diff(out
):
642 info
, path
, out
= out
.split('\0', 2)
644 is_submodule
= '160000' in info
[1:14]
645 yield (path
, status
, is_submodule
)
648 def diff_index(context
, head
, cached
=True, paths
=None):
657 args
= [head
, '--'] + paths
658 status
, out
, _
= git
.diff_index(cached
=cached
, z
=True, *args
)
661 args
[0] = EMPTY_TREE_OID
662 status
, out
, _
= git
.diff_index(cached
=cached
, z
=True, *args
)
664 for path
, status
, is_submodule
in _parse_raw_diff(out
):
672 unmerged
.append(path
)
674 return staged
, unmerged
, deleted
, submodules
677 def diff_worktree(context
, paths
=None):
685 args
= ['--'] + paths
686 status
, out
, _
= git
.diff_files(z
=True, *args
)
687 for path
, status
, is_submodule
in _parse_raw_diff(out
):
691 modified
.append(path
)
695 return modified
, deleted
, submodules
698 def diff_upstream(context
, head
):
699 """Given `ref`, return $(git merge-base ref HEAD)..ref."""
700 tracked
= tracked_branch(context
)
703 base
= merge_base(context
, head
, tracked
)
704 return diff_filenames(context
, base
, tracked
)
707 def list_submodule(context
):
708 """Return submodules in the format(state, sha1, path, describe)"""
710 status
, data
, _
= git
.submodule('status')
712 if status
== 0 and data
:
713 data
= data
.splitlines()
714 # see git submodule status
715 # TODO better separation
717 state
= line
[0].strip()
718 sha1
= line
[1 : OID_LENGTH
+ 1]
719 left_bracket
= line
.find('(', OID_LENGTH
+ 3)
720 if left_bracket
== -1:
721 left_bracket
= len(line
) + 1
722 path
= line
[OID_LENGTH
+ 2 : left_bracket
- 1]
723 describe
= line
[left_bracket
+ 1 : -1]
724 ret
.append((state
, sha1
, path
, describe
))
728 def merge_base(context
, head
, ref
):
729 """Return the merge-base of head and ref"""
731 return git
.merge_base(head
, ref
, _readonly
=True)[STDOUT
]
734 def merge_base_parent(context
, branch
):
735 tracked
= tracked_branch(context
, branch
=branch
)
742 def parse_ls_tree(context
, rev
):
743 """Return a list of (mode, type, oid, path) tuples."""
746 lines
= git
.ls_tree(rev
, r
=True, _readonly
=True)[STDOUT
].splitlines()
747 regex
= re
.compile(r
'^(\d+)\W(\w+)\W(\w+)[ \t]+(.*)$')
749 match
= regex
.match(line
)
751 mode
= match
.group(1)
752 objtype
= match
.group(2)
754 filename
= match
.group(4)
767 def ls_tree(context
, path
, ref
='HEAD'):
768 """Return a parsed git ls-tree result for a single directory"""
771 status
, out
, _
= git
.ls_tree(ref
, '--', path
, z
=True, full_tree
=True)
772 if status
== 0 and out
:
773 path_offset
= 6 + 1 + 4 + 1 + OID_LENGTH
+ 1
774 for line
in out
[:-1].split('\0'):
776 # .....6 ...4 ......................................40
777 # 040000 tree c127cde9a0c644a3a8fef449a244f47d5272dfa6 relative
778 # 100644 blob 139e42bf4acaa4927ec9be1ec55a252b97d3f1e2 relative/path
779 # 0..... 7... 12...................................... 53
780 # path_offset = 6 + 1 + 4 + 1 + OID_LENGTH(40) + 1
782 relpath
= line
[path_offset
:]
783 result
.append((objtype
, relpath
))
788 # A regex for matching the output of git(log|rev-list) --pretty=oneline
789 REV_LIST_REGEX
= re
.compile(r
'^([0-9a-f]{40}) (.*)$')
792 def parse_rev_list(raw_revs
):
793 """Parse `git log --pretty=online` output into (oid, summary) pairs."""
795 for line
in raw_revs
.splitlines():
796 match
= REV_LIST_REGEX
.match(line
)
798 rev_id
= match
.group(1)
799 summary
= match
.group(2)
809 # pylint: disable=redefined-builtin
810 def log_helper(context
, all
=False, extra_args
=None):
811 """Return parallel arrays containing oids and summaries."""
818 output
= log(git
, pretty
='oneline', all
=all
, *args
)
819 for line
in output
.splitlines():
820 match
= REV_LIST_REGEX
.match(line
)
822 revs
.append(match
.group(1))
823 summaries
.append(match
.group(2))
824 return (revs
, summaries
)
827 def rev_list_range(context
, start
, end
):
828 """Return (oid, summary) pairs between start and end."""
830 revrange
= '%s..%s' % (start
, end
)
831 out
= git
.rev_list(revrange
, pretty
='oneline')[STDOUT
]
832 return parse_rev_list(out
)
835 def commit_message_path(context
):
836 """Return the path to .git/GIT_COLA_MSG"""
838 path
= git
.git_path('GIT_COLA_MSG')
839 if core
.exists(path
):
844 def merge_message_path(context
):
845 """Return the path to .git/MERGE_MSG or .git/SQUASH_MSG."""
847 for basename
in ('MERGE_MSG', 'SQUASH_MSG'):
848 path
= git
.git_path(basename
)
849 if core
.exists(path
):
854 def prepare_commit_message_hook(context
):
855 """Run the cola.preparecommitmessagehook to prepare the commit message"""
857 default_hook
= config
.hooks_path('cola-prepare-commit-msg')
858 return config
.get('cola.preparecommitmessagehook', default
=default_hook
)
861 def abort_merge(context
):
862 """Abort a merge by reading the tree at HEAD."""
865 status
, out
, err
= git
.read_tree('HEAD', reset
=True, u
=True, v
=True)
867 merge_head
= git
.git_path('MERGE_HEAD')
868 if core
.exists(merge_head
):
869 core
.unlink(merge_head
)
870 # remove MERGE_MESSAGE, etc.
871 merge_msg_path
= merge_message_path(context
)
872 while merge_msg_path
:
873 core
.unlink(merge_msg_path
)
874 merge_msg_path
= merge_message_path(context
)
875 return status
, out
, err
878 def strip_remote(remotes
, remote_branch
):
879 for remote
in remotes
:
880 prefix
= remote
+ '/'
881 if remote_branch
.startswith(prefix
):
882 return remote_branch
[len(prefix
) :]
883 return remote_branch
.split('/', 1)[-1]
886 def parse_refs(context
, argv
):
887 """Parse command-line arguments into object IDs"""
889 status
, out
, _
= git
.rev_parse(*argv
)
891 oids
= [oid
for oid
in out
.splitlines() if oid
]
897 def prev_commitmsg(context
, *args
):
898 """Queries git for the latest commit message."""
900 return git
.log('-1', no_color
=True, pretty
='format:%s%n%n%b', *args
)[STDOUT
]
903 def rev_parse(context
, name
):
904 """Call git rev-parse and return the output"""
906 status
, out
, _
= git
.rev_parse(name
)
914 def write_blob(context
, oid
, filename
):
915 """Write a blob to a temporary file and return the path
917 Modern versions of Git allow invoking filters. Older versions
918 get the object content as-is.
921 if version
.check_git(context
, 'cat-file-filters-path'):
922 return cat_file_to_path(context
, filename
, oid
)
923 return cat_file_blob(context
, filename
, oid
)
926 def cat_file_blob(context
, filename
, oid
):
927 return cat_file(context
, filename
, 'blob', oid
)
930 def cat_file_to_path(context
, filename
, oid
):
931 return cat_file(context
, filename
, oid
, path
=filename
, filters
=True)
934 def cat_file(context
, filename
, *args
, **kwargs
):
935 """Redirect git cat-file output to a path"""
938 # Use the original filename in the suffix so that the generated filename
939 # has the correct extension, and so that it resembles the original name.
940 basename
= os
.path
.basename(filename
)
941 suffix
= '-' + basename
# ensures the correct filename extension
942 path
= utils
.tmp_filename('blob', suffix
=suffix
)
943 with
open(path
, 'wb') as fp
:
944 status
, out
, err
= git
.cat_file(
945 _raw
=True, _readonly
=True, _stdout
=fp
, *args
, **kwargs
947 Interaction
.command(N_('Error'), 'git cat-file', status
, out
, err
)
955 def write_blob_path(context
, head
, oid
, filename
):
956 """Use write_blob() when modern git is available"""
957 if version
.check_git(context
, 'cat-file-filters-path'):
958 return write_blob(context
, oid
, filename
)
959 return cat_file_blob(context
, filename
, head
+ ':' + filename
)
962 def annex_path(context
, head
, filename
):
963 """Return the git-annex path for a filename at the specified commit"""
968 # unfortunately there's no way to filter this down to a single path
969 # so we just have to scan all reported paths
970 status
, out
, _
= git
.annex('findref', '--json', head
)
972 for line
in out
.splitlines():
973 info
= json
.loads(line
)
975 annex_file
= info
['file']
976 except (ValueError, KeyError):
978 # we only care about this file so we can skip the rest
979 if annex_file
== filename
:
982 key
= annex_info
.get('key', '')
984 status
, out
, _
= git
.annex('contentlocation', key
)
985 if status
== 0 and os
.path
.exists(out
):
991 def is_binary(context
, filename
):
992 cfg_is_binary
= context
.cfg
.is_binary(filename
)
993 if cfg_is_binary
is not None:
995 # This is the same heuristic as xdiff-interface.c:buffer_is_binary().
998 result
= core
.read(filename
, size
=size
, encoding
='bytes')
999 except (IOError, OSError):
1002 return b
'\0' in result
1005 def is_valid_ref(context
, ref
):
1006 """Is the provided Git ref a valid refname?"""
1007 status
, _
, _
= context
.git
.rev_parse(ref
, quiet
=True, verify
=True)