grep/pcre2: fix an edge case concerning ascii patterns and UTF-8 data
[git/debian.git] / t / t7812-grep-icase-non-ascii.sh
blob22487d90fdc68e30b24144ac0899d60df0ea3fb0
1 #!/bin/sh
3 test_description='grep icase on non-English locales'
5 . ./lib-gettext.sh
7 test_expect_success GETTEXT_LOCALE 'setup' '
8 test_write_lines "TILRAUN: Halló Heimur!" >file &&
9 git add file &&
10 LC_ALL="$is_IS_locale" &&
11 export LC_ALL
14 test_have_prereq GETTEXT_LOCALE &&
15 test-tool regex "HALLÓ" "Halló" ICASE &&
16 test_set_prereq REGEX_LOCALE
18 test_expect_success REGEX_LOCALE 'grep literal string, no -F' '
19 git grep -i "TILRAUN: Halló Heimur!" &&
20 git grep -i "TILRAUN: HALLÓ HEIMUR!"
23 test_expect_success GETTEXT_LOCALE,PCRE 'grep pcre utf-8 icase' '
24 git grep --perl-regexp "TILRAUN: H.lló Heimur!" &&
25 git grep --perl-regexp -i "TILRAUN: H.lló Heimur!" &&
26 git grep --perl-regexp -i "TILRAUN: H.LLÓ HEIMUR!"
29 test_expect_success GETTEXT_LOCALE,PCRE 'grep pcre utf-8 string with "+"' '
30 test_write_lines "TILRAUN: Hallóó Heimur!" >file2 &&
31 git add file2 &&
32 git grep -l --perl-regexp "TILRAUN: H.lló+ Heimur!" >actual &&
33 echo file >expected &&
34 echo file2 >>expected &&
35 test_cmp expected actual
38 test_expect_success REGEX_LOCALE 'grep literal string, with -F' '
39 git grep -i -F "TILRAUN: Halló Heimur!" &&
40 git grep -i -F "TILRAUN: HALLÓ HEIMUR!"
43 test_expect_success REGEX_LOCALE 'grep string with regex, with -F' '
44 test_write_lines "TILRAUN: Halló Heimur [abc]!" >file3 &&
45 git add file3 &&
46 git grep -i -F "TILRAUN: Halló Heimur [abc]!" file3
49 test_expect_success REGEX_LOCALE 'pickaxe -i on non-ascii' '
50 git commit -m first &&
51 git log --format=%f -i -S"TILRAUN: HALLÓ HEIMUR!" >actual &&
52 echo first >expected &&
53 test_cmp expected actual
56 test_expect_success GETTEXT_LOCALE,PCRE 'log --author with an ascii pattern on UTF-8 data' '
57 cat >expected <<-\EOF &&
58 Author: <BOLD;RED>À Ú Thor<RESET> <author@example.com>
59 EOF
60 test_write_lines "forth" >file4 &&
61 git add file4 &&
62 git commit --author="À Ú Thor <author@example.com>" -m sécond &&
63 git log -1 --color=always --perl-regexp --author=".*Thor" >log &&
64 grep Author log >actual.raw &&
65 test_decode_color <actual.raw >actual &&
66 test_cmp expected actual
69 test_expect_success GETTEXT_LOCALE,PCRE 'log --committer with an ascii pattern on ISO-8859-1 data' '
70 cat >expected <<-\EOF &&
71 Commit: Ç<BOLD;RED> O Mîtter <committer@example.com><RESET>
72 EOF
73 test_write_lines "fifth" >file5 &&
74 git add file5 &&
75 GIT_COMMITTER_NAME="Ç O Mîtter" &&
76 GIT_COMMITTER_EMAIL="committer@example.com" &&
77 git -c i18n.commitEncoding=latin1 commit -m thïrd &&
78 git -c i18n.logOutputEncoding=latin1 log -1 --pretty=fuller --color=always --perl-regexp --committer=" O.*" >log &&
79 grep Commit: log >actual.raw &&
80 test_decode_color <actual.raw >actual &&
81 test_cmp expected actual
84 test_expect_success GETTEXT_LOCALE,PCRE 'log --grep with an ascii pattern on UTF-8 data' '
85 cat >expected <<-\EOF &&
86 sé<BOLD;RED>con<RESET>d
87 EOF
88 git log -1 --color=always --perl-regexp --grep="con" >log &&
89 grep con log >actual.raw &&
90 test_decode_color <actual.raw >actual &&
91 test_cmp expected actual
94 test_expect_success GETTEXT_LOCALE,PCRE 'log --grep with an ascii pattern on ISO-8859-1 data' '
95 cat >expected <<-\EOF &&
96 <BOLD;RED>thïrd<RESET>
97 EOF
98 git -c i18n.logOutputEncoding=latin1 log -1 --color=always --perl-regexp --grep="th.*rd" >log &&
99 grep "th.*rd" log >actual.raw &&
100 test_decode_color <actual.raw >actual &&
101 test_cmp expected actual
104 test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: setup invalid UTF-8 data' '
105 printf "\\200\\n" >invalid-0x80 &&
106 echo "ævar" >expected &&
107 cat expected >>invalid-0x80 &&
108 git add invalid-0x80 &&
110 # Test for PCRE2_MATCH_INVALID_UTF bug
111 # https://bugs.exim.org/show_bug.cgi?id=2642
112 printf "\\345Aæ\\n" >invalid-0xe5 &&
113 git add invalid-0xe5
116 test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep ASCII from invalid UTF-8 data' '
117 git grep -h "var" invalid-0x80 >actual &&
118 test_cmp expected actual &&
119 git grep -h "(*NO_JIT)var" invalid-0x80 >actual &&
120 test_cmp expected actual
123 test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep ASCII from invalid UTF-8 data (PCRE2 bug #2642)' '
124 git grep -h "Aæ" invalid-0xe5 >actual &&
125 test_cmp invalid-0xe5 actual &&
126 git grep -h "(*NO_JIT)Aæ" invalid-0xe5 >actual &&
127 test_cmp invalid-0xe5 actual
130 test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep non-ASCII from invalid UTF-8 data' '
131 git grep -h "æ" invalid-0x80 >actual &&
132 test_cmp expected actual &&
133 git grep -h "(*NO_JIT)æ" invalid-0x80 >actual &&
134 test_cmp expected actual
137 test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep non-ASCII from invalid UTF-8 data (PCRE2 bug #2642)' '
138 git grep -h "Aæ" invalid-0xe5 >actual &&
139 test_cmp invalid-0xe5 actual &&
140 git grep -h "(*NO_JIT)Aæ" invalid-0xe5 >actual &&
141 test_cmp invalid-0xe5 actual
144 test_lazy_prereq PCRE2_MATCH_INVALID_UTF '
145 test-tool pcre2-config has-PCRE2_MATCH_INVALID_UTF
148 test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: grep non-ASCII from invalid UTF-8 data with -i' '
149 test_might_fail git grep -hi "Æ" invalid-0x80 >actual &&
150 test_might_fail git grep -hi "(*NO_JIT)Æ" invalid-0x80 >actual
153 test_expect_success GETTEXT_LOCALE,LIBPCRE2,PCRE2_MATCH_INVALID_UTF 'PCRE v2: grep non-ASCII from invalid UTF-8 data with -i' '
154 git grep -hi "Æ" invalid-0x80 >actual &&
155 test_cmp expected actual &&
156 git grep -hi "(*NO_JIT)Æ" invalid-0x80 >actual &&
157 test_cmp expected actual
160 test_expect_success GETTEXT_LOCALE,LIBPCRE2,PCRE2_MATCH_INVALID_UTF 'PCRE v2: grep non-ASCII from invalid UTF-8 data with -i (PCRE2 bug #2642)' '
161 git grep -hi "Æ" invalid-0xe5 >actual &&
162 test_cmp invalid-0xe5 actual &&
163 git grep -hi "(*NO_JIT)Æ" invalid-0xe5 >actual &&
164 test_cmp invalid-0xe5 actual &&
166 # Only the case of grepping the ASCII part in a way that
167 # relies on -i fails
168 git grep -hi "aÆ" invalid-0xe5 >actual &&
169 test_cmp invalid-0xe5 actual &&
170 git grep -hi "(*NO_JIT)aÆ" invalid-0xe5 >actual &&
171 test_cmp invalid-0xe5 actual
174 test_done