From 2871f4d447214874e13cf764ab3a170c9d844ca2 Mon Sep 17 00:00:00 2001 From: Jerry Zhang Date: Mon, 24 Oct 2022 20:07:43 +0000 Subject: [PATCH] builtin: patch-id: add --verbatim as a command mode There are situations where the user might not want the default setting where patch-id strips all whitespace. They might be working in a language where white space is syntactically important, or they might have CI testing that enforces strict whitespace linting. In these cases, a whitespace change would result in the patch fundamentally changing, and thus deserving of a different id. Add a new mode that is exclusive of --stable and --unstable called --verbatim. It also corresponds to the config patchid.verbatim = true. In this mode, the stable algorithm is used and whitespace is not stripped from the patch text. Users of --unstable mainly care about compatibility with old git versions, which unstripping the whitespace would break. Thus there isn't a usecase for the combination of --verbatim and --unstable, and we don't expose this so as to not add maintainence burden. Signed-off-by: Jerry Zhang fixes https://github.com/Skydio/revup/issues/2 Signed-off-by: Junio C Hamano --- Documentation/git-patch-id.txt | 24 +++++++++----- builtin/patch-id.c | 73 +++++++++++++++++++++++++++--------------- t/t4204-patch-id.sh | 66 ++++++++++++++++++++++++++++++++++---- 3 files changed, 124 insertions(+), 39 deletions(-) diff --git a/Documentation/git-patch-id.txt b/Documentation/git-patch-id.txt index 442caff8a9..1d15fa45d5 100644 --- a/Documentation/git-patch-id.txt +++ b/Documentation/git-patch-id.txt @@ -8,18 +8,18 @@ git-patch-id - Compute unique ID for a patch SYNOPSIS -------- [verse] -'git patch-id' [--stable | --unstable] +'git patch-id' [--stable | --unstable | --verbatim] DESCRIPTION ----------- Read a patch from the standard input and compute the patch ID for it. A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a -patch, with whitespace and line numbers ignored. As such, it's "reasonably -stable", but at the same time also reasonably unique, i.e., two patches that -have the same "patch ID" are almost guaranteed to be the same thing. +patch, with line numbers ignored. As such, it's "reasonably stable", but at +the same time also reasonably unique, i.e., two patches that have the same +"patch ID" are almost guaranteed to be the same thing. -IOW, you can use this thing to look for likely duplicate commits. +The main usecase for this command is to look for likely duplicate commits. When dealing with 'git diff-tree' output, it takes advantage of the fact that the patch is prefixed with the object name of the @@ -30,6 +30,12 @@ This can be used to make a mapping from patch ID to commit ID. OPTIONS ------- +--verbatim:: + Calculate the patch-id of the input as it is given, do not strip + any whitespace. + + This is the default if patchid.verbatim is true. + --stable:: Use a "stable" sum of hashes as the patch ID. With this option: - Reordering file diffs that make up a patch does not affect the ID. @@ -45,14 +51,16 @@ OPTIONS of "-O", thereby making existing databases storing such "unstable" or historical patch-ids unusable. + - All whitespace within the patch is ignored and does not affect the id. + This is the default if patchid.stable is set to true. --unstable:: Use an "unstable" hash as the patch ID. With this option, the result produced is compatible with the patch-id value produced - by git 1.9 and older. Users with pre-existing databases storing - patch-ids produced by git 1.9 and older (who do not deal with reordered - patches) may want to use this option. + by git 1.9 and older and whitespace is ignored. Users with pre-existing + databases storing patch-ids produced by git 1.9 and older (who do not deal + with reordered patches) may want to use this option. This is the default. diff --git a/builtin/patch-id.c b/builtin/patch-id.c index e7a3112314..afdd472369 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -2,6 +2,7 @@ #include "builtin.h" #include "config.h" #include "diff.h" +#include "parse-options.h" static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result) { @@ -57,7 +58,7 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after) } static int get_one_patchid(struct object_id *next_oid, struct object_id *result, - struct strbuf *line_buf, int stable) + struct strbuf *line_buf, int stable, int verbatim) { int patchlen = 0, found_next = 0; int before = -1, after = -1; @@ -76,8 +77,11 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result, if (!skip_prefix(line, "diff-tree ", &p) && !skip_prefix(line, "commit ", &p) && !skip_prefix(line, "From ", &p) && - starts_with(line, "\\ ") && 12 < strlen(line)) + starts_with(line, "\\ ") && 12 < strlen(line)) { + if (verbatim) + the_hash_algo->update_fn(&ctx, line, strlen(line)); continue; + } if (!get_oid_hex(p, next_oid)) { found_next = 1; @@ -152,8 +156,8 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result, if (line[0] == '+' || line[0] == ' ') after--; - /* Compute the sha without whitespace */ - len = remove_space(line); + /* Add line to hash algo (possibly removing whitespace) */ + len = verbatim ? strlen(line) : remove_space(line); patchlen += len; the_hash_algo->update_fn(&ctx, line, len); } @@ -166,7 +170,7 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result, return patchlen; } -static void generate_id_list(int stable) +static void generate_id_list(int stable, int verbatim) { struct object_id oid, n, result; int patchlen; @@ -174,21 +178,32 @@ static void generate_id_list(int stable) oidclr(&oid); while (!feof(stdin)) { - patchlen = get_one_patchid(&n, &result, &line_buf, stable); + patchlen = get_one_patchid(&n, &result, &line_buf, stable, verbatim); flush_current_id(patchlen, &oid, &result); oidcpy(&oid, &n); } strbuf_release(&line_buf); } -static const char patch_id_usage[] = "git patch-id [--stable | --unstable]"; +static const char *const patch_id_usage[] = { + N_("git patch-id [--stable | --unstable | --verbatim]"), NULL +}; + +struct patch_id_opts { + int stable; + int verbatim; +}; static int git_patch_id_config(const char *var, const char *value, void *cb) { - int *stable = cb; + struct patch_id_opts *opts = cb; if (!strcmp(var, "patchid.stable")) { - *stable = git_config_bool(var, value); + opts->stable = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "patchid.verbatim")) { + opts->verbatim = git_config_bool(var, value); return 0; } @@ -197,21 +212,29 @@ static int git_patch_id_config(const char *var, const char *value, void *cb) int cmd_patch_id(int argc, const char **argv, const char *prefix) { - int stable = -1; - - git_config(git_patch_id_config, &stable); - - /* If nothing is set, default to unstable. */ - if (stable < 0) - stable = 0; - - if (argc == 2 && !strcmp(argv[1], "--stable")) - stable = 1; - else if (argc == 2 && !strcmp(argv[1], "--unstable")) - stable = 0; - else if (argc != 1) - usage(patch_id_usage); - - generate_id_list(stable); + /* if nothing is set, default to unstable */ + struct patch_id_opts config = {0, 0}; + int opts = 0; + struct option builtin_patch_id_options[] = { + OPT_CMDMODE(0, "unstable", &opts, + N_("use the unstable patch-id algorithm"), 1), + OPT_CMDMODE(0, "stable", &opts, + N_("use the stable patch-id algorithm"), 2), + OPT_CMDMODE(0, "verbatim", &opts, + N_("don't strip whitespace from the patch"), 3), + OPT_END() + }; + + git_config(git_patch_id_config, &config); + + /* verbatim implies stable */ + if (config.verbatim) + config.stable = 1; + + argc = parse_options(argc, argv, prefix, builtin_patch_id_options, + patch_id_usage, 0); + + generate_id_list(opts ? opts > 1 : config.stable, + opts ? opts == 3 : config.verbatim); return 0; } diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh index cdc5191aa8..a7fa94ce0a 100755 --- a/t/t4204-patch-id.sh +++ b/t/t4204-patch-id.sh @@ -8,13 +8,13 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . ./test-lib.sh test_expect_success 'setup' ' - as="a a a a a a a a" && # eight a - test_write_lines $as >foo && - test_write_lines $as >bar && + str="ab cd ef gh ij kl mn op" && + test_write_lines $str >foo && + test_write_lines $str >bar && git add foo bar && git commit -a -m initial && - test_write_lines $as b >foo && - test_write_lines $as b >bar && + test_write_lines $str b >foo && + test_write_lines $str b >bar && git commit -a -m first && git checkout -b same main && git commit --amend -m same-msg && @@ -22,8 +22,23 @@ test_expect_success 'setup' ' echo c >foo && echo c >bar && git commit --amend -a -m notsame-msg && + git checkout -b with_space main~ && + cat >foo <<-\EOF && + a b + c d + e f + g h + i j + k l + m n + op + EOF + cp foo bar && + git add foo bar && + git commit --amend -m "with spaces" && test_write_lines bar foo >bar-then-foo && test_write_lines foo bar >foo-then-bar + ' test_expect_success 'patch-id output is well-formed' ' @@ -128,9 +143,21 @@ test_patch_id_file_order () { git format-patch -1 --stdout -O foo-then-bar >format-patch.output && calc_patch_id top-diff.output && + calc_patch_id top-diff.output && + calc_patch_id