tests: remove ugly /bin/sh wrapper around each perl-based test script
[coreutils.git] / tests / misc / sort
blobf26bc5fbcb077bcbcbd9f1ebdcae0b29c2ebb9cc
1 #!/usr/bin/perl
3 # Copyright (C) 2008 Free Software Foundation, Inc.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 use strict;
20 my $prog = 'sort';
22 # Turn off localization of executable's output.
23 @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
25 # Since each test is run with a file name and with redirected stdin,
26 # the name in the diagnostic is either the file name or "-".
27 # Normalize each diagnostic to use '-'.
28 my $normalize_filename = {ERR_SUBST => 's/^$prog: .*?:/$prog: -:/'};
30 my $no_file = "$prog: open failed: no-file: No such file or directory\n";
32 my @Tests =
34 ["n1", '-n', {IN=>".01\n0\n"}, {OUT=>"0\n.01\n"}],
35 ["n2", '-n', {IN=>".02\n.01\n"}, {OUT=>".01\n.02\n"}],
36 ["n3", '-n', {IN=>".02\n.00\n"}, {OUT=>".00\n.02\n"}],
37 ["n4", '-n', {IN=>".02\n.000\n"}, {OUT=>".000\n.02\n"}],
38 ["n5", '-n', {IN=>".021\n.029\n"}, {OUT=>".021\n.029\n"}],
40 ["n6", '-n', {IN=>".02\n.0*\n"}, {OUT=>".0*\n.02\n"}],
41 ["n7", '-n', {IN=>".02\n.*\n"}, {OUT=>".*\n.02\n"}],
42 ["n8a", '-s -n -k1,1', {IN=>".0a\n.0b\n"}, {OUT=>".0a\n.0b\n"}],
43 ["n8b", '-s -n -k1,1', {IN=>".0b\n.0a\n"}, {OUT=>".0b\n.0a\n"}],
44 ["n9a", '-s -n -k1,1', {IN=>".000a\n.000b\n"}, {OUT=>".000a\n.000b\n"}],
45 ["n9b", '-s -n -k1,1', {IN=>".000b\n.000a\n"}, {OUT=>".000b\n.000a\n"}],
46 ["n10a", '-s -n -k1,1', {IN=>".00a\n.000b\n"}, {OUT=>".00a\n.000b\n"}],
47 ["n10b", '-s -n -k1,1', {IN=>".00b\n.000a\n"}, {OUT=>".00b\n.000a\n"}],
48 ["n11a", '-s -n -k1,1', {IN=>".01a\n.010\n"}, {OUT=>".01a\n.010\n"}],
49 ["n11b", '-s -n -k1,1', {IN=>".010\n.01a\n"}, {OUT=>".010\n.01a\n"}],
51 ["01a", '', {IN=>"A\nB\nC\n"}, {OUT=>"A\nB\nC\n"}],
53 ["02a", '-c', {IN=>"A\nB\nC\n"}, {OUT=>''}],
54 ["02b", '-c', {IN=>"A\nC\nB\n"}, {OUT=>''}, {EXIT=>1},
55 {ERR=>"$prog: -:3: disorder: B\n"}, $normalize_filename],
56 ["02c", '-c -k1,1', {IN=>"a\na b\n"}, {OUT=>''}],
57 ["02d", '-C', {IN=>"A\nB\nC\n"}, {OUT=>''}],
58 ["02e", '-C', {IN=>"A\nC\nB\n"}, {OUT=>''}, {EXIT=>1}],
59 # This should fail because there are duplicate keys
60 ["02m", '-cu', {IN=>"A\nA\n"}, {OUT=>''}, {EXIT=>1},
61 {ERR=>"$prog: -:2: disorder: A\n"}, $normalize_filename],
62 ["02n", '-cu', {IN=>"A\nB\n"}, {OUT=>''}],
63 ["02o", '-cu', {IN=>"A\nB\nB\n"}, {OUT=>''}, {EXIT=>1},
64 {ERR=>"$prog: -:3: disorder: B\n"}, $normalize_filename],
65 ["02p", '-cu', {IN=>"B\nA\nB\n"}, {OUT=>''}, {EXIT=>1},
66 {ERR=>"$prog: -:2: disorder: A\n"}, $normalize_filename],
68 ["03a", '-k1', {IN=>"B\nA\n"}, {OUT=>"A\nB\n"}],
69 ["03b", '-k1,1', {IN=>"B\nA\n"}, {OUT=>"A\nB\n"}],
70 ["03c", '-k1 -k2', {IN=>"A b\nA a\n"}, {OUT=>"A a\nA b\n"}],
71 # Fail with a diagnostic when -k specifies field == 0.
72 ["03d", '-k0', {EXIT=>2},
73 {ERR=>"$prog: -: invalid field specification `0'\n"},
74 $normalize_filename],
75 # Fail with a diagnostic when -k specifies character == 0.
76 ["03e", '-k1.0', {EXIT=>2},
77 {ERR=>"$prog: character offset is zero: invalid field specification `1.0'\n"}],
78 ["03f", '-k1.1,-k0', {EXIT=>2},
79 {ERR=>"$prog: invalid number after `,': invalid count at start of `-k0'\n"}],
80 # This is ok.
81 ["03g", '-k1.1,1.0', {IN=>''}],
82 # This is equivalent to 3f.
83 ["03h", '-k1.1,1', {IN=>''}],
84 # This too, is equivalent to 3f.
85 ["03i", '-k1,1', {IN=>''}],
87 ["04a", '-nc', {IN=>"2\n11\n"}],
88 ["04b", '-n', {IN=>"11\n2\n"}, {OUT=>"2\n11\n"}],
89 ["04c", '-k1n', {IN=>"11\n2\n"}, {OUT=>"2\n11\n"}],
90 ["04d", '-k1', {IN=>"11\n2\n"}, {OUT=>"11\n2\n"}],
91 ["04e", '-k2', {IN=>"ignored B\nz-ig A\n"}, {OUT=>"z-ig A\nignored B\n"}],
93 ["05a", '-k1,2', {IN=>"A B\nA A\n"}, {OUT=>"A A\nA B\n"}],
94 ["05b", '-k1,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
95 ["05c", '-k1 -k2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
96 ["05d", '-k2,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
97 ["05e", '-k2,2', {IN=>"A B Z\nA A A\n"}, {OUT=>"A A A\nA B Z\n"}],
98 ["05f", '-k2,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
100 ["06a", '-k 1,2', {IN=>"A B\nA A\n"}, {OUT=>"A A\nA B\n"}],
101 ["06b", '-k 1,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
102 ["06c", '-k 1 -k 2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
103 ["06d", '-k 2,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
104 ["06e", '-k 2,2', {IN=>"A B Z\nA A A\n"}, {OUT=>"A A A\nA B Z\n"}],
105 ["06f", '-k 2,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
107 ["07a", '-k 2,3', {IN=>"9 a b\n7 a a\n"}, {OUT=>"7 a a\n9 a b\n"}],
108 ["07b", '-k 2,3', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}],
109 ["07c", '-k 2,3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
110 ["07d", '+1 -3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
112 # report an error for `.' without following char spec
113 ["08a", '-k 2.,3', {EXIT=>2},
114 {ERR=>"$prog: invalid number after `.': invalid count at start of `,3'\n"}],
115 # report an error for `,' without following POS2
116 ["08b", '-k 2,', {EXIT=>2},
117 {ERR=>"$prog: invalid number after `,': invalid count at start of `'\n"}],
119 # Test new -g option.
120 ["09a", '-g', {IN=>"1e2\n2e1\n"}, {OUT=>"2e1\n1e2\n"}],
121 # Make sure -n works how we expect.
122 ["09b", '-n', {IN=>"1e2\n2e1\n"}, {OUT=>"1e2\n2e1\n"}],
123 ["09c", '-n', {IN=>"2e1\n1e2\n"}, {OUT=>"1e2\n2e1\n"}],
124 ["09d", '-k2g', {IN=>"a 1e2\nb 2e1\n"}, {OUT=>"b 2e1\na 1e2\n"}],
126 # Bug reported by Roger Peel <R.Peel@ee.surrey.ac.uk>
127 ["10a", '-t : -k 2.2,2.2', {IN=>":ba\n:ab\n"}, {OUT=>":ba\n:ab\n"}],
128 # Equivalent to above, but using obsolescent `+pos -pos' option syntax.
129 ["10b", '-t : +1.1 -1.2', {IN=>":ba\n:ab\n"}, {OUT=>":ba\n:ab\n"}],
131 # The same as the preceding two, but with input lines reversed.
132 ["10c", '-t : -k 2.2,2.2', {IN=>":ab\n:ba\n"}, {OUT=>":ba\n:ab\n"}],
133 # Equivalent to above, but using obsolescent `+pos -pos' option syntax.
134 ["10d", '-t : +1.1 -1.2', {IN=>":ab\n:ba\n"}, {OUT=>":ba\n:ab\n"}],
135 # Try without -t...
136 # But note that we have to count the delimiting space at the beginning
137 # of each field that has it.
138 ["10a0", '-k 2.3,2.3', {IN=>"z ba\nz ab\n"}, {OUT=>"z ba\nz ab\n"}],
139 ["10a1", '-k 1.2,1.2', {IN=>"ba\nab\n"}, {OUT=>"ba\nab\n"}],
140 ["10a2", '-b -k 2.2,2.2', {IN=>"z ba\nz ab\n"}, {OUT=>"z ba\nz ab\n"}],
142 # An even simpler example demonstrating the bug.
143 ["10e", '-k 1.2,1.2', {IN=>"ab\nba\n"}, {OUT=>"ba\nab\n"}],
145 # The way sort works on these inputs (10f and 10g) seems wrong to me.
146 # See http://git.sv.gnu.org/gitweb/?p=coreutils.git;a=commitdiff;h=3c467c0d223
147 # POSIX doesn't seem to say one way or the other, but that's the way all
148 # other sort implementations work.
149 ["10f", '-t : -k 1.3,1.3', {IN=>":ab\n:ba\n"}, {OUT=>":ba\n:ab\n"}],
150 ["10g", '-k 1.4,1.4', {IN=>"a ab\nb ba\n"}, {OUT=>"b ba\na ab\n"}],
152 # Exercise bug re using -b to skip trailing blanks.
153 ["11a", '-t: -k1,1b -k2,2', {IN=>"a\t:a\na :b\n"}, {OUT=>"a\t:a\na :b\n"}],
154 ["11b", '-t: -k1,1b -k2,2', {IN=>"a :b\na\t:a\n"}, {OUT=>"a\t:a\na :b\n"}],
155 ["11c", '-t: -k2,2b -k3,3', {IN=>"z:a\t:a\na :b\n"}, {OUT=>"z:a\t:a\na :b\n"}],
156 # Before 1.22m, the first key comparison reported equality.
157 # With 1.22m, they compare different: "a" sorts before "a\n",
158 # and the second key spec isn't even used.
159 ["11d", '-t: -k2,2b -k3,3', {IN=>"z:a :b\na\t:a\n"}, {OUT=>"a\t:a\nz:a :b\n"}],
161 # Exercise bug re comparing `-' and integers.
162 ["12a", '-n -t: +1', {IN=>"a:1\nb:-\n"}, {OUT=>"b:-\na:1\n"}],
163 ["12b", '-n -t: +1', {IN=>"b:-\na:1\n"}, {OUT=>"b:-\na:1\n"}],
164 # Try some other (e.g. `X') invalid character.
165 ["12c", '-n -t: +1', {IN=>"a:1\nb:X\n"}, {OUT=>"b:X\na:1\n"}],
166 ["12d", '-n -t: +1', {IN=>"b:X\na:1\n"}, {OUT=>"b:X\na:1\n"}],
167 # From Karl Heuer
168 ["13a", '+0.1n', {IN=>"axx\nb-1\n"}, {OUT=>"b-1\naxx\n"}],
169 ["13b", '+0.1n', {IN=>"b-1\naxx\n"}, {OUT=>"b-1\naxx\n"}],
171 # From Carl Johnson <carlj@cjlinux.home.org>
172 ["14a", '-d -u', {IN=>"mal\nmal-\nmala\n"}, {OUT=>"mal\nmala\n"}],
173 # Be sure to fix the (translate && ignore) case in keycompare.
174 ["14b", '-f -d -u', {IN=>"mal\nmal-\nmala\n"}, {OUT=>"mal\nmala\n"}],
176 # Experiment with -i.
177 ["15a", '-i -u', {IN=>"a\na\1\n"}, {OUT=>"a\n"}],
178 ["15b", '-i -u', {IN=>"a\n\1a\n"}, {OUT=>"a\n"}],
179 ["15c", '-i -u', {IN=>"a\1\na\n"}, {OUT=>"a\1\n"}],
180 ["15d", '-i -u', {IN=>"\1a\na\n"}, {OUT=>"\1a\n"}],
181 ["15e", '-i -u', {IN=>"a\n\1\1\1\1\1a\1\1\1\1\n"}, {OUT=>"a\n"}],
183 # From Erick Branderhorst -- fixed around 1.19e
184 ["16a", '-f',
185 {IN=>"éminence\nüberhaupt\n's-Gravenhage\naëroclub\nAag\naagtappels\n"},
186 {OUT=>"'s-Gravenhage\nAag\naagtappels\naëroclub\néminence\nüberhaupt\n"}],
188 # This provokes a one-byte memory overrun of a malloc'd block for versions
189 # of sort from textutils-1.19p and before.
190 ["17", '-c', {IN=>"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n"}],
192 # POSIX says -n no longer implies -b, so here we're comparing ` 9' and `10'.
193 ["18a", '-k1.1,1.2n', {IN=>" 901\n100\n"}, {OUT=>" 901\n100\n"}],
195 # Just like above, because the global `-b' has no effect on the
196 # key specifier when a key-specific option (`n' in this case) is used.
197 ["18b", '-b -k1.1,1.2n', {IN=>" 901\n100\n"}, {OUT=>" 901\n100\n"}],
199 # Here we're comparing ` 90' and `10', because the `b' on the key-end specifier
200 # makes sort ignore leading blanks when determining that key's *end*.
201 ["18c", '-k1.1,1.2nb', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}],
203 # Here we're comparing `9' and `10', because the `b' on the key-start specifier
204 # makes sort ignore leading blanks when determining that key's *start*.
205 ["18d", '-k1.1b,1.2n', {IN=>" 901\n100\n"}, {OUT=>" 901\n100\n"}],
207 # This compares `90' and `10', as it ignores leading blanks for both
208 # key start and key end.
209 ["18e", '-nb -k1.1,1.2', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}],
211 # This looks odd, but works properly -- 2nd keyspec is never
212 # used because all lines are different.
213 ["19a", '+0 +1nr', {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 1\nb 2\nb 3\n"}],
215 # The test *intended* by the author of the above, but using the
216 # more-intuitive POSIX-style -k options.
217 ["19b", '-k1,1 -k2nr', {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 3\nb 2\nb 1\n"}],
219 # This test failed when sort-1.22 was compiled on a Next x86 system
220 # without optimization. Without optimization gcc uses the buggy version
221 # of memcmp in the Next C library. With optimization, gcc uses its
222 # (working) builtin version. Test case form William Lewis.
223 ["20a", '',
224 {IN=>"_________U__free\n_________U__malloc\n_________U__abort\n_________U__memcpy\n_________U__memset\n_________U_dyld_stub_binding_helper\n_________U__malloc\n_________U___iob\n_________U__abort\n_________U__fprintf\n"},
225 {OUT=>"_________U___iob\n_________U__abort\n_________U__abort\n_________U__fprintf\n_________U__free\n_________U__malloc\n_________U__malloc\n_________U__memcpy\n_________U__memset\n_________U_dyld_stub_binding_helper\n"}],
227 # Demonstrate that folding changes the ordering of e.g. A, a, and _
228 # because while they normally (in the C locale) collate like A, _, a,
229 # when using -f, `a' is compared as if it were `A'.
230 ["21a", '', {IN=>"A\na\n_\n"}, {OUT=>"A\n_\na\n"}],
231 ["21b", '-f', {IN=>"A\na\n_\n"}, {OUT=>"A\na\n_\n"}],
232 ["21c", '-f', {IN=>"a\nA\n_\n"}, {OUT=>"A\na\n_\n"}],
233 ["21d", '-f', {IN=>"_\na\nA\n"}, {OUT=>"A\na\n_\n"}],
234 ["21e", '-f', {IN=>"a\n_\nA\n"}, {OUT=>"A\na\n_\n"}],
235 ["21f", '-fs', {IN=>"A\na\n_\n"}, {OUT=>"A\na\n_\n"}],
236 ["21g", '-fu', {IN=>"a\n_\n"}, {OUT=>"a\n_\n"}],
238 # This test failed until 1.22f. From Zvi Har'El.
239 ["22a", '-k 2,2fd -k 1,1r', {IN=>"3 b\n4 B\n"}, {OUT=>"4 B\n3 b\n"}],
240 ["22b", '-k 2,2d -k 1,1r', {IN=>"3 b\n4 b\n"}, {OUT=>"4 b\n3 b\n"}],
242 ["no-file1", 'no-file', {EXIT=>2}, {ERR=>$no_file}],
243 # This test failed until 1.22f. Sort didn't give an error.
244 # From Will Edgington.
245 ["o-no-file1", qw(-o no-file no-file), {EXIT=>2}, {ERR=>$no_file}],
247 ["create-empty", qw(-o no/such/file /dev/null), {EXIT=>2},
248 {ERR=>"$prog: open failed: no/such/file: No such file or directory\n"}],
250 # From Paul Eggert. This was fixed in textutils-1.22k.
251 ["neg-nls", '-n', {IN=>"-1\n-9\n"}, {OUT=>"-9\n-1\n"}],
253 # From Paul Eggert. This was fixed in textutils-1.22m.
254 # The bug was visible only when using the internationalized sorting code
255 # (i.e., not when configured with --disable-nls).
256 ["nul-nls", '', {IN=>"\0b\n\0a\n"}, {OUT=>"\0a\n\0b\n"}],
258 # Paul Eggert wrote:
259 # I tested the revised `sort' against Solaris `sort', and found a
260 # discrepancy that turns out to be a longstanding bug in GNU sort.
261 # POSIX.2 specifies that a newline is part of the input line, and should
262 # be significant during comparison; but with GNU sort the newline is
263 # insignificant. Here is an example of the bug:
265 # $ od -c t
266 # 0000000 \n \t \n
267 # 0000003
268 # $ sort t | od -c
269 # 0000000 \n \t \n
270 # 0000003
272 # The correct output of the latter command should be
274 # 0000000 \t \n \n
275 # 0000003
277 # because \t comes before \n in the collating sequence, and the trailing
278 # \n's are part of the input line.
279 ["use-nl", '', {IN=>"\n\t\n"}, {OUT=>"\n\t\n"}],
281 # Specifying two -o options should evoke a failure
282 ["o2", qw(-o x -o y), {EXIT=>2},
283 {ERR=>"foo\n"}, {ERR_SUBST => 's/^$prog: .*/foo/'}],
285 # Specifying incompatible options should evoke a failure.
286 ["incompat1", '-in', {EXIT=>2},
287 {ERR=>"$prog: options `-in' are incompatible\n"}],
288 ["incompat2", '-fR', {EXIT=>2},
289 {ERR=>"$prog: options `-fR' are incompatible\n"}],
290 ["incompat3", '-dfgiMnR', {EXIT=>2},
291 {ERR=>"$prog: options `-dfgMnR' are incompatible\n"}],
292 ["incompat4", qw(-c -o /dev/null), {EXIT=>2},
293 {ERR=>"$prog: options `-co' are incompatible\n"}],
294 ["incompat5", qw(-C -o /dev/null), {EXIT=>2},
295 {ERR=>"$prog: options `-Co' are incompatible\n"}],
296 ["incompat6", '-cC', {EXIT=>2},
297 {ERR=>"$prog: options `-cC' are incompatible\n"}],
298 ["incompat7", qw(--sort=random -n), {EXIT=>2},
299 {ERR=>"$prog: options `-nR' are incompatible\n"}],
301 # -t '\0' is accepted, as of coreutils-5.0.91
302 ['nul-tab', "-k2,2 -t '\\0'",
303 {IN=>"a\0z\01\nb\0y\02\n"}, {OUT=>"b\0y\02\na\0z\01\n"}],
305 ["bigfield", qw(-k 340282366920938463463374607431768211456),
306 {IN=>"2\n1\n"}, {OUT=>"1\n2\n"}],
308 # Using an old-style key-specifying option like +1 with an invalid
309 # ordering-option character would cause sort to try to free an invalid
310 # (non-malloc'd) pointer. This bug affects coreutils-6.5 through 6.9.
311 ['obs-inval', '+1x', {EXIT=>2},
312 {ERR=>"foo\n"}, {ERR_SUBST => 's/^$prog: .*/foo/'}],
314 # Exercise the code that enlarges the line buffer. See the thread here:
315 # http://thread.gmane.org/gmane.comp.gnu.coreutils.bugs/11006
316 ['realloc-buf', '-S1', {IN=>'a'x4000 ."\n"}, {OUT=>'a'x4000 ."\n"}],
318 ["sort-numeric", '--sort=numeric', {IN=>".01\n0\n"}, {OUT=>"0\n.01\n"}],
319 ["sort-gennum", '--sort=general-numeric',
320 {IN=>"1e2\n2e1\n"}, {OUT=>"2e1\n1e2\n"}],
323 # Add _POSIX2_VERSION=199209 to the environment of each test
324 # that uses an old-style option like +1.
325 foreach my $t (@Tests)
327 foreach my $e (@$t)
329 !ref $e && $e =~ /\+\d/
330 and push (@$t, {ENV=>'_POSIX2_VERSION=199209'}), last;
334 @Tests = triple_test \@Tests;
336 my $save_temps = $ENV{DEBUG};
337 my $verbose = $ENV{VERBOSE};
339 my $fail = run_tests ($prog, $prog, \@Tests, $save_temps, $verbose);
340 exit $fail;