Git 2.45
[git/gitster.git] / t / t0028-working-tree-encoding.sh
blobad151a346708a5898eb5bdc536131baddafdf987
1 #!/bin/sh
3 test_description='working-tree-encoding conversion via gitattributes'
5 GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
6 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
8 TEST_PASSES_SANITIZE_LEAK=true
9 TEST_CREATE_REPO_NO_TEMPLATE=1
10 . ./test-lib.sh
11 . "$TEST_DIRECTORY/lib-encoding.sh"
13 GIT_TRACE_WORKING_TREE_ENCODING=1 && export GIT_TRACE_WORKING_TREE_ENCODING
15 test_expect_success 'setup test files' '
16 git config core.eol lf &&
18 text="hallo there!\ncan you read me?" &&
19 echo "*.utf16 text working-tree-encoding=utf-16" >.gitattributes &&
20 echo "*.utf16lebom text working-tree-encoding=UTF-16LE-BOM" >>.gitattributes &&
21 printf "$text" >test.utf8.raw &&
22 printf "$text" | write_utf16 >test.utf16.raw &&
23 printf "$text" | write_utf32 >test.utf32.raw &&
24 printf "\377\376" >test.utf16lebom.raw &&
25 printf "$text" | iconv -f UTF-8 -t UTF-16LE >>test.utf16lebom.raw &&
27 # Line ending tests
28 printf "one\ntwo\nthree\n" >lf.utf8.raw &&
29 printf "one\r\ntwo\r\nthree\r\n" >crlf.utf8.raw &&
31 # BOM tests
32 printf "\0a\0b\0c" >nobom.utf16be.raw &&
33 printf "a\0b\0c\0" >nobom.utf16le.raw &&
34 printf "\376\377\0a\0b\0c" >bebom.utf16be.raw &&
35 printf "\377\376a\0b\0c\0" >lebom.utf16le.raw &&
36 printf "\0\0\0a\0\0\0b\0\0\0c" >nobom.utf32be.raw &&
37 printf "a\0\0\0b\0\0\0c\0\0\0" >nobom.utf32le.raw &&
38 printf "\0\0\376\377\0\0\0a\0\0\0b\0\0\0c" >bebom.utf32be.raw &&
39 printf "\377\376\0\0a\0\0\0b\0\0\0c\0\0\0" >lebom.utf32le.raw &&
41 # Add only UTF-16 file, we will add the UTF-32 file later
42 cp test.utf16.raw test.utf16 &&
43 cp test.utf32.raw test.utf32 &&
44 cp test.utf16lebom.raw test.utf16lebom &&
45 git add .gitattributes test.utf16 test.utf16lebom &&
46 git commit -m initial
49 test_expect_success 'ensure UTF-8 is stored in Git' '
50 test_when_finished "rm -f test.utf16.git" &&
52 git cat-file -p :test.utf16 >test.utf16.git &&
53 test_cmp_bin test.utf8.raw test.utf16.git
56 test_expect_success 're-encode to UTF-16 on checkout' '
57 test_when_finished "rm -f test.utf16.raw" &&
59 rm test.utf16 &&
60 git checkout test.utf16 &&
61 test_cmp_bin test.utf16.raw test.utf16
64 test_expect_success 're-encode to UTF-16-LE-BOM on checkout' '
65 rm test.utf16lebom &&
66 git checkout test.utf16lebom &&
67 test_cmp_bin test.utf16lebom.raw test.utf16lebom
70 test_expect_success 'check $GIT_DIR/info/attributes support' '
71 test_when_finished "rm -f test.utf32.git" &&
72 test_when_finished "git reset --hard HEAD" &&
74 mkdir .git/info &&
75 echo "*.utf32 text working-tree-encoding=utf-32" >.git/info/attributes &&
76 git add test.utf32 &&
78 git cat-file -p :test.utf32 >test.utf32.git &&
79 test_cmp_bin test.utf8.raw test.utf32.git
82 for i in 16 32
84 test_expect_success "check prohibited UTF-${i} BOM" '
85 test_when_finished "git reset --hard HEAD" &&
87 echo "*.utf${i}be text working-tree-encoding=utf-${i}be" >>.gitattributes &&
88 echo "*.utf${i}le text working-tree-encoding=utf-${i}LE" >>.gitattributes &&
90 # Here we add a UTF-16 (resp. UTF-32) files with BOM (big/little-endian)
91 # but we tell Git to treat it as UTF-16BE/UTF-16LE (resp. UTF-32).
92 # In these cases the BOM is prohibited.
93 cp bebom.utf${i}be.raw bebom.utf${i}be &&
94 test_must_fail git add bebom.utf${i}be 2>err.out &&
95 test_grep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
96 test_grep "use UTF-${i} as working-tree-encoding" err.out &&
98 cp lebom.utf${i}le.raw lebom.utf${i}be &&
99 test_must_fail git add lebom.utf${i}be 2>err.out &&
100 test_grep "fatal: BOM is prohibited .* utf-${i}be" err.out &&
101 test_grep "use UTF-${i} as working-tree-encoding" err.out &&
103 cp bebom.utf${i}be.raw bebom.utf${i}le &&
104 test_must_fail git add bebom.utf${i}le 2>err.out &&
105 test_grep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
106 test_grep "use UTF-${i} as working-tree-encoding" err.out &&
108 cp lebom.utf${i}le.raw lebom.utf${i}le &&
109 test_must_fail git add lebom.utf${i}le 2>err.out &&
110 test_grep "fatal: BOM is prohibited .* utf-${i}LE" err.out &&
111 test_grep "use UTF-${i} as working-tree-encoding" err.out
114 test_expect_success "check required UTF-${i} BOM" '
115 test_when_finished "git reset --hard HEAD" &&
117 echo "*.utf${i} text working-tree-encoding=utf-${i}" >>.gitattributes &&
119 cp nobom.utf${i}be.raw nobom.utf${i} &&
120 test_must_fail git add nobom.utf${i} 2>err.out &&
121 test_grep "fatal: BOM is required .* utf-${i}" err.out &&
122 test_grep "use UTF-${i}BE or UTF-${i}LE" err.out &&
124 cp nobom.utf${i}le.raw nobom.utf${i} &&
125 test_must_fail git add nobom.utf${i} 2>err.out &&
126 test_grep "fatal: BOM is required .* utf-${i}" err.out &&
127 test_grep "use UTF-${i}BE or UTF-${i}LE" err.out
130 test_expect_success "eol conversion for UTF-${i} encoded files on checkout" '
131 test_when_finished "rm -f crlf.utf${i}.raw lf.utf${i}.raw" &&
132 test_when_finished "git reset --hard HEAD^" &&
134 write_utf${i} <lf.utf8.raw >lf.utf${i}.raw &&
135 write_utf${i} <crlf.utf8.raw >crlf.utf${i}.raw &&
136 cp crlf.utf${i}.raw eol.utf${i} &&
138 cat >expectIndexLF <<-EOF &&
139 i/lf w/-text attr/text eol.utf${i}
142 git add eol.utf${i} &&
143 git commit -m eol &&
145 # UTF-${i} with CRLF (Windows line endings)
146 rm eol.utf${i} &&
147 git -c core.eol=crlf checkout eol.utf${i} &&
148 test_cmp_bin crlf.utf${i}.raw eol.utf${i} &&
150 # Although the file has CRLF in the working tree,
151 # ensure LF in the index
152 git ls-files --eol eol.utf${i} >actual &&
153 test_cmp expectIndexLF actual &&
155 # UTF-${i} with LF (Unix line endings)
156 rm eol.utf${i} &&
157 git -c core.eol=lf checkout eol.utf${i} &&
158 test_cmp_bin lf.utf${i}.raw eol.utf${i} &&
160 # The file LF in the working tree, ensure LF in the index
161 git ls-files --eol eol.utf${i} >actual &&
162 test_cmp expectIndexLF actual
164 done
166 test_expect_success 'check unsupported encodings' '
167 test_when_finished "git reset --hard HEAD" &&
169 echo "*.set text working-tree-encoding" >.gitattributes &&
170 printf "set" >t.set &&
171 test_must_fail git add t.set 2>err.out &&
172 test_grep "true/false are no valid working-tree-encodings" err.out &&
174 echo "*.unset text -working-tree-encoding" >.gitattributes &&
175 printf "unset" >t.unset &&
176 git add t.unset &&
178 echo "*.empty text working-tree-encoding=" >.gitattributes &&
179 printf "empty" >t.empty &&
180 git add t.empty &&
182 echo "*.garbage text working-tree-encoding=garbage" >.gitattributes &&
183 printf "garbage" >t.garbage &&
184 test_must_fail git add t.garbage 2>err.out &&
185 test_grep "failed to encode" err.out
188 test_expect_success 'error if encoding round trip is not the same during refresh' '
189 BEFORE_STATE=$(git rev-parse HEAD) &&
190 test_when_finished "git reset --hard $BEFORE_STATE" &&
192 # Add and commit a UTF-16 file but skip the "working-tree-encoding"
193 # filter. Consequently, the in-repo representation is UTF-16 and not
194 # UTF-8. This simulates a Git version that has no working tree encoding
195 # support.
196 echo "*.utf16le text working-tree-encoding=utf-16le" >.gitattributes &&
197 echo "hallo" >nonsense.utf16le &&
198 TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16le) &&
199 git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16le &&
200 COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
201 git update-ref refs/heads/main $COMMIT &&
203 test_must_fail git checkout HEAD^ 2>err.out &&
204 test_grep "error: .* overwritten by checkout:" err.out
207 test_expect_success 'error if encoding garbage is already in Git' '
208 BEFORE_STATE=$(git rev-parse HEAD) &&
209 test_when_finished "git reset --hard $BEFORE_STATE" &&
211 # Skip the UTF-16 filter for the added file
212 # This simulates a Git version that has no checkoutEncoding support
213 cp nobom.utf16be.raw nonsense.utf16 &&
214 TEST_HASH=$(git hash-object --no-filters -w nonsense.utf16) &&
215 git update-index --add --cacheinfo 100644 $TEST_HASH nonsense.utf16 &&
216 COMMIT=$(git commit-tree -p $(git rev-parse HEAD) -m "plain commit" $(git write-tree)) &&
217 git update-ref refs/heads/main $COMMIT &&
219 git diff 2>err.out &&
220 test_grep "error: BOM is required" err.out
223 test_lazy_prereq ICONV_SHIFT_JIS '
224 iconv -f UTF-8 -t SHIFT-JIS </dev/null
227 test_expect_success ICONV_SHIFT_JIS 'check roundtrip encoding' '
228 test_when_finished "rm -f roundtrip.shift roundtrip.utf16" &&
229 test_when_finished "git reset --hard HEAD" &&
231 text="hallo there!\nroundtrip test here!" &&
232 printf "$text" | iconv -f UTF-8 -t SHIFT-JIS >roundtrip.shift &&
233 printf "$text" | write_utf16 >roundtrip.utf16 &&
234 echo "*.shift text working-tree-encoding=SHIFT-JIS" >>.gitattributes &&
236 # SHIFT-JIS encoded files are round-trip checked by default...
237 GIT_TRACE=1 git add .gitattributes roundtrip.shift 2>&1 |
238 grep "Checking roundtrip encoding for SHIFT-JIS" &&
239 git reset &&
241 # ... unless we overwrite the Git config!
242 ! GIT_TRACE=1 git -c core.checkRoundtripEncoding=garbage \
243 add .gitattributes roundtrip.shift 2>&1 |
244 grep "Checking roundtrip encoding for SHIFT-JIS" &&
245 git reset &&
247 # UTF-16 encoded files should not be round-trip checked by default...
248 ! GIT_TRACE=1 git add roundtrip.utf16 2>&1 |
249 grep "Checking roundtrip encoding for UTF-16" &&
250 git reset &&
252 # ... unless we tell Git to check it!
253 GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-16, UTF-32" \
254 add roundtrip.utf16 2>&1 |
255 grep "Checking roundtrip encoding for utf-16" &&
256 git reset &&
258 # ... unless we tell Git to check it!
259 # (here we also check that the casing of the encoding is irrelevant)
260 GIT_TRACE=1 git -c core.checkRoundtripEncoding="UTF-32, utf-16" \
261 add roundtrip.utf16 2>&1 |
262 grep "Checking roundtrip encoding for utf-16" &&
263 git reset
266 # $1: checkout encoding
267 # $2: test string
268 # $3: binary test string in checkout encoding
269 test_commit_utf8_checkout_other () {
270 encoding="$1"
271 orig_string="$2"
272 expect_bytes="$3"
274 test_expect_success "Commit UTF-8, checkout $encoding" '
275 test_when_finished "git checkout HEAD -- .gitattributes" &&
277 test_ext="commit_utf8_checkout_$encoding" &&
278 test_file="test.$test_ext" &&
280 # Commit as UTF-8
281 echo "*.$test_ext text working-tree-encoding=UTF-8" >.gitattributes &&
282 printf "$orig_string" >$test_file &&
283 git add $test_file &&
284 git commit -m "Test data" &&
286 # Checkout in tested encoding
287 rm $test_file &&
288 echo "*.$test_ext text working-tree-encoding=$encoding" >.gitattributes &&
289 git checkout HEAD -- $test_file &&
291 # Test
292 printf $expect_bytes >$test_file.raw &&
293 test_cmp_bin $test_file.raw $test_file
297 test_commit_utf8_checkout_other "UTF-8" "Test Тест" "\124\145\163\164\040\320\242\320\265\321\201\321\202"
298 test_commit_utf8_checkout_other "UTF-16LE" "Test Тест" "\124\000\145\000\163\000\164\000\040\000\042\004\065\004\101\004\102\004"
299 test_commit_utf8_checkout_other "UTF-16BE" "Test Тест" "\000\124\000\145\000\163\000\164\000\040\004\042\004\065\004\101\004\102"
300 test_commit_utf8_checkout_other "UTF-16LE-BOM" "Test Тест" "\377\376\124\000\145\000\163\000\164\000\040\000\042\004\065\004\101\004\102\004"
301 test_commit_utf8_checkout_other "UTF-16BE-BOM" "Test Тест" "\376\377\000\124\000\145\000\163\000\164\000\040\004\042\004\065\004\101\004\102"
302 test_commit_utf8_checkout_other "UTF-32LE" "Test Тест" "\124\000\000\000\145\000\000\000\163\000\000\000\164\000\000\000\040\000\000\000\042\004\000\000\065\004\000\000\101\004\000\000\102\004\000\000"
303 test_commit_utf8_checkout_other "UTF-32BE" "Test Тест" "\000\000\000\124\000\000\000\145\000\000\000\163\000\000\000\164\000\000\000\040\000\000\004\042\000\000\004\065\000\000\004\101\000\000\004\102"
305 test_done