Initial commit: Uploaded everything from abs/core
[arch-rock.git] / base / coreutils / coreutils-i18n.patch
blobab64bfd07d32ee750b191125d0ce2bfb5308edee
1 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
2 +++ coreutils-6.8+/tests/sort/sort-mb-tests 2007-03-01 15:08:24.000000000 +0000
3 @@ -0,0 +1,58 @@
4 +#! /bin/sh
5 +case $# in
6 + 0) xx='../../src/sort';;
7 + *) xx="$1";;
8 +esac
9 +test "$VERBOSE" && echo=echo || echo=:
10 +$echo testing program: $xx
11 +errors=0
12 +test "$srcdir" || srcdir=.
13 +test "$VERBOSE" && $xx --version 2> /dev/null
15 +export LC_ALL=en_US.UTF-8
16 +locale -k LC_CTYPE 2>&1 | grep -q charmap.*UTF-8 || exit 77
17 +errors=0
19 +$xx -t ? -k2 -n mb1.I > mb1.O
20 +code=$?
21 +if test $code != 0; then
22 + $echo "Test mb1 failed: $xx return code $code differs from expected value 0" 1>&2
23 + errors=`expr $errors + 1`
24 +else
25 + cmp mb1.O $srcdir/mb1.X > /dev/null 2>&1
26 + case $? in
27 + 0) if test "$VERBOSE"; then $echo "passed mb1"; fi;;
28 + 1) $echo "Test mb1 failed: files mb1.O and $srcdir/mb1.X differ" 1>&2
29 + (diff -c mb1.O $srcdir/mb1.X) 2> /dev/null
30 + errors=`expr $errors + 1`;;
31 + 2) $echo "Test mb1 may have failed." 1>&2
32 + $echo The command "cmp mb1.O $srcdir/mb1.X" failed. 1>&2
33 + errors=`expr $errors + 1`;;
34 + esac
35 +fi
37 +$xx -t ? -k4 -n mb2.I > mb2.O
38 +code=$?
39 +if test $code != 0; then
40 + $echo "Test mb2 failed: $xx return code $code differs from expected value 0" 1>&2
41 + errors=`expr $errors + 1`
42 +else
43 + cmp mb2.O $srcdir/mb2.X > /dev/null 2>&1
44 + case $? in
45 + 0) if test "$VERBOSE"; then $echo "passed mb2"; fi;;
46 + 1) $echo "Test mb2 failed: files mb2.O and $srcdir/mb2.X differ" 1>&2
47 + (diff -c mb2.O $srcdir/mb2.X) 2> /dev/null
48 + errors=`expr $errors + 1`;;
49 + 2) $echo "Test mb2 may have failed." 1>&2
50 + $echo The command "cmp mb2.O $srcdir/mb2.X" failed. 1>&2
51 + errors=`expr $errors + 1`;;
52 + esac
53 +fi
55 +if test $errors = 0; then
56 + $echo Passed all 113 tests. 1>&2
57 +else
58 + $echo Failed $errors tests. 1>&2
59 +fi
60 +test $errors = 0 || errors=1
61 +exit $errors
62 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
63 +++ coreutils-6.8+/tests/sort/mb2.I 2007-03-01 15:08:24.000000000 +0000
64 @@ -0,0 +1,4 @@
65 +Apple???10??20
66 +Banana???5??30
67 +Citrus???20??5
68 +Cherry???30??10
69 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
70 +++ coreutils-6.8+/tests/sort/mb2.X 2007-03-01 15:08:24.000000000 +0000
71 @@ -0,0 +1,4 @@
72 +Citrus???20??5
73 +Cherry???30??10
74 +Apple???10??20
75 +Banana???5??30
76 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
77 +++ coreutils-6.8+/tests/sort/mb1.I 2007-03-01 15:08:24.000000000 +0000
78 @@ -0,0 +1,4 @@
79 +Apple?10
80 +Banana?5
81 +Citrus?20
82 +Cherry?30
83 --- /dev/null 2007-03-01 09:16:39.219409909 +0000
84 +++ coreutils-6.8+/tests/sort/mb1.X 2007-03-01 15:08:24.000000000 +0000
85 @@ -0,0 +1,4 @@
86 +Banana?5
87 +Apple?10
88 +Citrus?20
89 +Cherry?30
90 --- coreutils-6.8+/tests/sort/Makefile.am.i18n 2007-01-24 07:47:37.000000000 +0000
91 +++ coreutils-6.8+/tests/sort/Makefile.am 2007-03-01 15:09:59.000000000 +0000
92 @@ -66,15 +66,17 @@
93 bigfield.O bigfield.E
94 ##test-files-end
96 -EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen)
97 -noinst_SCRIPTS = $x-tests
98 +run_gen += mb1.0 mb2.0
100 +EXTRA_DIST = Test.pm $x-tests $(explicit) $(maint_gen) mb1.I mb1.X mb2.I mb2.X
101 +noinst_SCRIPTS = $x-tests # $x-mb-tests
102 TESTS_ENVIRONMENT = \
103 CU_TEST_NAME=`basename $(abs_srcdir)`,$$tst \
104 PATH="$(VG_PATH_PREFIX)`pwd`/../../src$(PATH_SEPARATOR)$$PATH"
106 editpl = sed -e 's,@''PERL''@,$(PERL),g' -e 's,@''srcdir''@,$(srcdir),g'
108 -TESTS = $x-tests
109 +TESTS = $x-tests $x-mb-tests
111 mk_script = $(srcdir)/../mk-script
112 $(srcdir)/$x-tests: $(mk_script) Test.pm Makefile.am
113 --- coreutils-6.8+/lib/linebuffer.h.i18n 2005-05-14 07:44:24.000000000 +0100
114 +++ coreutils-6.8+/lib/linebuffer.h 2007-03-01 15:08:24.000000000 +0000
115 @@ -22,6 +22,11 @@
117 # include <stdio.h>
119 +/* Get mbstate_t. */
120 +# if HAVE_WCHAR_H
121 +# include <wchar.h>
122 +# endif
124 /* A `struct linebuffer' holds a line of text. */
126 struct linebuffer
127 @@ -29,6 +34,9 @@
128 size_t size; /* Allocated. */
129 size_t length; /* Used. */
130 char *buffer;
131 +# if HAVE_WCHAR_H
132 + mbstate_t state;
133 +# endif
136 /* Initialize linebuffer LINEBUFFER for use. */
137 --- coreutils-6.8+/src/expand.c.i18n 2007-01-14 15:41:28.000000000 +0000
138 +++ coreutils-6.8+/src/expand.c 2007-03-01 15:08:24.000000000 +0000
139 @@ -38,11 +38,28 @@
140 #include <stdio.h>
141 #include <getopt.h>
142 #include <sys/types.h>
144 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
145 +#if HAVE_WCHAR_H
146 +# include <wchar.h>
147 +#endif
149 #include "system.h"
150 #include "error.h"
151 #include "quote.h"
152 #include "xstrndup.h"
154 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
155 + installation; work around this configuration error. */
156 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
157 +# define MB_LEN_MAX 16
158 +#endif
160 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
161 +#if HAVE_MBRTOWC && defined mbstate_t
162 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
163 +#endif
165 /* The official name of this program (e.g., no `g' prefix). */
166 #define PROGRAM_NAME "expand"
168 @@ -183,6 +200,7 @@
169 stops = num_start + len - 1;
173 else
175 error (0, 0, _("tab size contains invalid character(s): %s"),
176 @@ -365,6 +383,142 @@
180 +#if HAVE_MBRTOWC
181 +static void
182 +expand_multibyte (void)
184 + FILE *fp; /* Input strem. */
185 + mbstate_t i_state; /* Current shift state of the input stream. */
186 + mbstate_t i_state_bak; /* Back up the I_STATE. */
187 + mbstate_t o_state; /* Current shift state of the output stream. */
188 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
189 + char *bufpos; /* Next read position of BUF. */
190 + size_t buflen = 0; /* The length of the byte sequence in buf. */
191 + wchar_t wc; /* A gotten wide character. */
192 + size_t mblength; /* The byte size of a multibyte character
193 + which shows as same character as WC. */
194 + int tab_index = 0; /* Index in `tab_list' of next tabstop. */
195 + int column = 0; /* Column on screen of the next char. */
196 + int next_tab_column; /* Column the next tab stop is on. */
197 + int convert = 1; /* If nonzero, perform translations. */
199 + fp = next_file ((FILE *) NULL);
200 + if (fp == NULL)
201 + return;
203 + memset (&o_state, '\0', sizeof(mbstate_t));
204 + memset (&i_state, '\0', sizeof(mbstate_t));
206 + for (;;)
208 + /* Refill the buffer BUF. */
209 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
211 + memmove (buf, bufpos, buflen);
212 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
213 + bufpos = buf;
216 + /* No character is left in BUF. */
217 + if (buflen < 1)
219 + fp = next_file (fp);
221 + if (fp == NULL)
222 + break; /* No more files. */
223 + else
225 + memset (&i_state, '\0', sizeof(mbstate_t));
226 + continue;
230 + /* Get a wide character. */
231 + i_state_bak = i_state;
232 + mblength = mbrtowc (&wc, bufpos, buflen, &i_state);
234 + switch (mblength)
236 + case (size_t)-1: /* illegal byte sequence. */
237 + case (size_t)-2:
238 + mblength = 1;
239 + i_state = i_state_bak;
240 + if (convert)
242 + ++column;
243 + if (convert_entire_line == 0)
244 + convert = 0;
246 + putchar (*bufpos);
247 + break;
249 + case 0: /* null. */
250 + mblength = 1;
251 + if (convert && convert_entire_line == 0)
252 + convert = 0;
253 + putchar ('\0');
254 + break;
256 + default:
257 + if (wc == L'\n') /* LF. */
259 + tab_index = 0;
260 + column = 0;
261 + convert = 1;
262 + putchar ('\n');
264 + else if (wc == L'\t' && convert) /* Tab. */
266 + if (tab_size == 0)
268 + /* Do not let tab_index == first_free_tab;
269 + stop when it is 1 less. */
270 + while (tab_index < first_free_tab - 1
271 + && column >= tab_list[tab_index])
272 + tab_index++;
273 + next_tab_column = tab_list[tab_index];
274 + if (tab_index < first_free_tab - 1)
275 + tab_index++;
276 + if (column >= next_tab_column)
277 + next_tab_column = column + 1;
279 + else
280 + next_tab_column = column + tab_size - column % tab_size;
282 + while (column < next_tab_column)
284 + putchar (' ');
285 + ++column;
288 + else /* Others. */
290 + if (convert)
292 + if (wc == L'\b')
294 + if (column > 0)
295 + --column;
297 + else
299 + int width; /* The width of WC. */
301 + width = wcwidth (wc);
302 + column += (width > 0) ? width : 0;
303 + if (convert_entire_line == 0)
304 + convert = 0;
307 + fwrite (bufpos, sizeof(char), mblength, stdout);
310 + buflen -= mblength;
311 + bufpos += mblength;
314 +#endif
317 main (int argc, char **argv)
319 @@ -429,7 +583,12 @@
321 file_list = (optind < argc ? &argv[optind] : stdin_argv);
323 - expand ();
324 +#if HAVE_MBRTOWC
325 + if (MB_CUR_MAX > 1)
326 + expand_multibyte ();
327 + else
328 +#endif
329 + expand ();
331 if (have_read_stdin && fclose (stdin) != 0)
332 error (EXIT_FAILURE, errno, "-");
333 --- coreutils-6.8+/src/join.c.i18n 2007-01-14 15:41:28.000000000 +0000
334 +++ coreutils-6.8+/src/join.c 2007-03-01 15:08:24.000000000 +0000
335 @@ -23,16 +23,30 @@
336 #include <sys/types.h>
337 #include <getopt.h>
339 +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth(). */
340 +#if HAVE_WCHAR_H
341 +# include <wchar.h>
342 +#endif
344 +/* Get iswblank(), towupper. */
345 +#if HAVE_WCTYPE_H
346 +# include <wctype.h>
347 +#endif
349 #include "system.h"
350 #include "error.h"
351 #include "hard-locale.h"
352 #include "linebuffer.h"
353 -#include "memcasecmp.h"
354 #include "quote.h"
355 #include "stdio--.h"
356 #include "xmemcoll.h"
357 #include "xstrtol.h"
359 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
360 +#if HAVE_MBRTOWC && defined mbstate_t
361 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
362 +#endif
364 /* The official name of this program (e.g., no `g' prefix). */
365 #define PROGRAM_NAME "join"
367 @@ -104,10 +118,12 @@
368 /* Last element in `outlist', where a new element can be added. */
369 static struct outlist *outlist_end = &outlist_head;
371 -/* Tab character separating fields. If negative, fields are separated
372 - by any nonempty string of blanks, otherwise by exactly one
373 - tab character whose value (when cast to unsigned char) equals TAB. */
374 -static int tab = -1;
375 +/* Tab character separating fields. If NULL, fields are separated
376 + by any nonempty string of blanks. */
377 +static char *tab = NULL;
379 +/* The number of bytes used for tab. */
380 +static size_t tablen = 0;
382 static struct option const longopts[] =
384 @@ -190,6 +206,8 @@
386 /* Fill in the `fields' structure in LINE. */
388 +/* Fill in the `fields' structure in LINE. */
390 static void
391 xfields (struct line *line)
393 @@ -199,10 +217,11 @@
394 if (ptr == lim)
395 return;
397 - if (0 <= tab)
398 + if (tab != NULL)
400 + unsigned char t = tab[0];
401 char *sep;
402 - for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
403 + for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
404 extract_field (line, ptr, sep - ptr);
406 else
407 @@ -229,6 +248,148 @@
408 extract_field (line, ptr, lim - ptr);
411 +#if HAVE_MBRTOWC
412 +static void
413 +xfields_multibyte (struct line *line)
415 + char *ptr = line->buf.buffer;
416 + char const *lim = ptr + line->buf.length - 1;
417 + wchar_t wc = 0;
418 + size_t mblength = 1;
419 + mbstate_t state, state_bak;
421 + memset (&state, 0, sizeof (mbstate_t));
423 + if (ptr == lim)
424 + return;
426 + if (tab != NULL)
428 + unsigned char t = tab[0];
429 + char *sep = ptr;
430 + for (; ptr < lim; ptr = sep + mblength)
432 + sep = ptr;
433 + while (sep < lim)
435 + state_bak = state;
436 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
438 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
440 + mblength = 1;
441 + state = state_bak;
443 + mblength = (mblength < 1) ? 1 : mblength;
445 + if (mblength == tablen && !memcmp (sep, tab, mblength))
446 + break;
447 + else
449 + sep += mblength;
450 + continue;
454 + if (sep == lim)
455 + break;
457 + extract_field (line, ptr, sep - ptr);
460 + else
462 + /* Skip leading blanks before the first field. */
463 + while(ptr < lim)
465 + state_bak = state;
466 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
468 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
470 + mblength = 1;
471 + state = state_bak;
472 + break;
474 + mblength = (mblength < 1) ? 1 : mblength;
476 + if (!iswblank(wc))
477 + break;
478 + ptr += mblength;
481 + do
483 + char *sep;
484 + state_bak = state;
485 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
486 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
488 + mblength = 1;
489 + state = state_bak;
490 + break;
492 + mblength = (mblength < 1) ? 1 : mblength;
494 + sep = ptr + mblength;
495 + while (sep != lim)
497 + state_bak = state;
498 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
499 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
501 + mblength = 1;
502 + state = state_bak;
503 + break;
505 + mblength = (mblength < 1) ? 1 : mblength;
507 + if (iswblank (wc))
508 + break;
510 + sep += mblength;
513 + extract_field (line, ptr, sep - ptr);
514 + if (sep == lim)
515 + return;
517 + state_bak = state;
518 + mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
519 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
521 + mblength = 1;
522 + state = state_bak;
523 + break;
525 + mblength = (mblength < 1) ? 1 : mblength;
527 + ptr = sep + mblength;
528 + while (ptr != lim)
530 + state_bak = state;
531 + mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
532 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
534 + mblength = 1;
535 + state = state_bak;
536 + break;
538 + mblength = (mblength < 1) ? 1 : mblength;
540 + if (!iswblank (wc))
541 + break;
543 + ptr += mblength;
546 + while (ptr != lim);
549 + extract_field (line, ptr, lim - ptr);
551 +#endif
553 /* Read a line from FP into LINE and split it into fields.
554 Return true if successful. */
556 @@ -249,6 +410,11 @@
557 line->nfields_allocated = 0;
558 line->nfields = 0;
559 line->fields = NULL;
560 +#if HAVE_MBRTOWC
561 + if (MB_CUR_MAX > 1)
562 + xfields_multibyte (line);
563 + else
564 +#endif
565 xfields (line);
566 return true;
568 @@ -303,56 +469,114 @@
569 keycmp (struct line const *line1, struct line const *line2)
571 /* Start of field to compare in each file. */
572 - char *beg1;
573 - char *beg2;
575 - size_t len1;
576 - size_t len2; /* Length of fields to compare. */
577 + char *beg[2];
578 + char *copy[2];
579 + size_t len[2]; /* Length of fields to compare. */
580 int diff;
581 + int i, j;
583 if (join_field_1 < line1->nfields)
585 - beg1 = line1->fields[join_field_1].beg;
586 - len1 = line1->fields[join_field_1].len;
587 + beg[0] = line1->fields[join_field_1].beg;
588 + len[0] = line1->fields[join_field_1].len;
590 else
592 - beg1 = NULL;
593 - len1 = 0;
594 + beg[0] = NULL;
595 + len[0] = 0;
598 if (join_field_2 < line2->nfields)
600 - beg2 = line2->fields[join_field_2].beg;
601 - len2 = line2->fields[join_field_2].len;
602 + beg[1] = line2->fields[join_field_2].beg;
603 + len[1] = line2->fields[join_field_2].len;
605 else
607 - beg2 = NULL;
608 - len2 = 0;
609 + beg[1] = NULL;
610 + len[1] = 0;
613 - if (len1 == 0)
614 - return len2 == 0 ? 0 : -1;
615 - if (len2 == 0)
616 + if (len[0] == 0)
617 + return len[1] == 0 ? 0 : -1;
618 + if (len[1] == 0)
619 return 1;
621 if (ignore_case)
623 - /* FIXME: ignore_case does not work with NLS (in particular,
624 - with multibyte chars). */
625 - diff = memcasecmp (beg1, beg2, MIN (len1, len2));
626 +#ifdef HAVE_MBRTOWC
627 + if (MB_CUR_MAX > 1)
629 + size_t mblength;
630 + wchar_t wc, uwc;
631 + mbstate_t state, state_bak;
633 + memset (&state, '\0', sizeof (mbstate_t));
635 + for (i = 0; i < 2; i++)
637 + copy[i] = alloca (len[i] + 1);
639 + for (j = 0; j < MIN (len[0], len[1]);)
641 + state_bak = state;
642 + mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
644 + switch (mblength)
646 + case (size_t) -1:
647 + case (size_t) -2:
648 + state = state_bak;
649 + /* Fall through */
650 + case 0:
651 + mblength = 1;
652 + break;
654 + default:
655 + uwc = towupper (wc);
657 + if (uwc != wc)
659 + mbstate_t state_wc;
661 + memset (&state_wc, '\0', sizeof (mbstate_t));
662 + wcrtomb (copy[i] + j, uwc, &state_wc);
664 + else
665 + memcpy (copy[i] + j, beg[i] + j, mblength);
667 + j += mblength;
669 + copy[i][j] = '\0';
672 + else
673 +#endif
675 + for (i = 0; i < 2; i++)
677 + copy[i] = alloca (len[i] + 1);
679 + for (j = 0; j < MIN (len[0], len[1]); j++)
680 + copy[i][j] = toupper (beg[i][j]);
682 + copy[i][j] = '\0';
686 else
688 - if (hard_LC_COLLATE)
689 - return xmemcoll (beg1, len1, beg2, len2);
690 - diff = memcmp (beg1, beg2, MIN (len1, len2));
691 + copy[0] = (unsigned char *) beg[0];
692 + copy[1] = (unsigned char *) beg[1];
695 + if (hard_LC_COLLATE)
696 + return xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
697 + diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
699 if (diff)
700 return diff;
701 - return len1 < len2 ? -1 : len1 != len2;
702 + return len[0] - len[1];
705 /* Print field N of LINE if it exists and is nonempty, otherwise
706 @@ -377,11 +601,18 @@
708 /* Print the join of LINE1 and LINE2. */
710 +#define PUT_TAB_CHAR \
711 + do \
712 + { \
713 + (tab != NULL) ? \
714 + fwrite(tab, sizeof(char), tablen, stdout) : putchar (' '); \
715 + } \
716 + while (0)
718 static void
719 prjoin (struct line const *line1, struct line const *line2)
721 const struct outlist *outlist;
722 - char output_separator = tab < 0 ? ' ' : tab;
724 outlist = outlist_head.next;
725 if (outlist)
726 @@ -397,12 +628,12 @@
727 if (o->file == 0)
729 if (line1 == &uni_blank)
732 line = line2;
733 field = join_field_2;
735 else
738 line = line1;
739 field = join_field_1;
741 @@ -416,7 +647,7 @@
742 o = o->next;
743 if (o == NULL)
744 break;
745 - putchar (output_separator);
746 + PUT_TAB_CHAR;
748 putchar ('\n');
750 @@ -434,23 +665,23 @@
751 prfield (join_field_1, line1);
752 for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
754 - putchar (output_separator);
755 + PUT_TAB_CHAR;
756 prfield (i, line1);
758 for (i = join_field_1 + 1; i < line1->nfields; ++i)
760 - putchar (output_separator);
761 + PUT_TAB_CHAR;
762 prfield (i, line1);
765 for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
767 - putchar (output_separator);
768 + PUT_TAB_CHAR;
769 prfield (i, line2);
771 for (i = join_field_2 + 1; i < line2->nfields; ++i)
773 - putchar (output_separator);
774 + PUT_TAB_CHAR;
775 prfield (i, line2);
777 putchar ('\n');
778 @@ -859,20 +1090,41 @@
780 case 't':
782 - unsigned char newtab = optarg[0];
783 - if (! newtab)
784 + char *newtab;
785 + size_t newtablen;
786 + if (! optarg[0])
787 error (EXIT_FAILURE, 0, _("empty tab"));
788 - if (optarg[1])
789 + newtab = xstrdup (optarg);
790 +#if HAVE_MBRTOWC
791 + if (MB_CUR_MAX > 1)
793 + mbstate_t state;
795 + memset (&state, 0, sizeof (mbstate_t));
796 + newtablen = mbrtowc (NULL, newtab,
797 + strnlen (newtab, MB_LEN_MAX),
798 + &state);
799 + if (newtablen == (size_t) 0
800 + || newtablen == (size_t) -1
801 + || newtablen == (size_t) -2)
802 + newtablen = 1;
804 + else
805 +#endif
806 + newtablen = 1;
808 + if (newtablen == 1 && newtab[1])
810 + if (STREQ (newtab, "\\0"))
811 + newtab[0] = '\0';
813 + if (tab != NULL && strcmp (tab, newtab))
815 - if (STREQ (optarg, "\\0"))
816 - newtab = '\0';
817 - else
818 - error (EXIT_FAILURE, 0, _("multi-character tab %s"),
819 - quote (optarg));
820 + free (newtab);
821 + error (EXIT_FAILURE, 0, _("incompatible tabs"));
823 - if (0 <= tab && tab != newtab)
824 - error (EXIT_FAILURE, 0, _("incompatible tabs"));
825 tab = newtab;
826 + tablen = newtablen;
828 break;
830 --- coreutils-6.8+/src/uniq.c.i18n 2007-01-14 15:41:28.000000000 +0000
831 +++ coreutils-6.8+/src/uniq.c 2007-03-01 15:08:24.000000000 +0000
832 @@ -23,6 +23,16 @@
833 #include <getopt.h>
834 #include <sys/types.h>
836 +/* Get mbstate_t, mbrtowc(). */
837 +#if HAVE_WCHAR_H
838 +# include <wchar.h>
839 +#endif
841 +/* Get isw* functions. */
842 +#if HAVE_WCTYPE_H
843 +# include <wctype.h>
844 +#endif
846 #include "system.h"
847 #include "argmatch.h"
848 #include "linebuffer.h"
849 @@ -32,7 +42,19 @@
850 #include "quote.h"
851 #include "xmemcoll.h"
852 #include "xstrtol.h"
853 -#include "memcasecmp.h"
854 +#include "xmemcoll.h"
856 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
857 + installation; work around this configuration error. */
858 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
859 +# define MB_LEN_MAX 16
860 +#endif
862 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
863 +#if HAVE_MBRTOWC && defined mbstate_t
864 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
865 +#endif
868 /* The official name of this program (e.g., no `g' prefix). */
869 #define PROGRAM_NAME "uniq"
870 @@ -109,6 +131,10 @@
871 /* Select whether/how to delimit groups of duplicate lines. */
872 static enum delimit_method delimit_groups;
874 +/* Function pointers. */
875 +static char *
876 +(*find_field) (struct linebuffer *line);
878 static struct option const longopts[] =
880 {"count", no_argument, NULL, 'c'},
881 @@ -198,7 +224,7 @@
882 return a pointer to the beginning of the line's field to be compared. */
884 static char *
885 -find_field (const struct linebuffer *line)
886 +find_field_uni (struct linebuffer *line)
888 size_t count;
889 char *lp = line->buffer;
890 @@ -219,6 +245,83 @@
891 return lp + i;
894 +#if HAVE_MBRTOWC
896 +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL) \
897 + do \
898 + { \
899 + mbstate_t state_bak; \
901 + CONVFAIL = 0; \
902 + state_bak = *STATEP; \
904 + MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP); \
906 + switch (MBLENGTH) \
907 + { \
908 + case (size_t)-2: \
909 + case (size_t)-1: \
910 + *STATEP = state_bak; \
911 + CONVFAIL++; \
912 + /* Fall through */ \
913 + case 0: \
914 + MBLENGTH = 1; \
915 + } \
916 + } \
917 + while (0)
919 +static char *
920 +find_field_multi (struct linebuffer *line)
922 + size_t count;
923 + char *lp = line->buffer;
924 + size_t size = line->length - 1;
925 + size_t pos;
926 + size_t mblength;
927 + wchar_t wc;
928 + mbstate_t *statep;
929 + int convfail;
931 + pos = 0;
932 + statep = &(line->state);
934 + /* skip fields. */
935 + for (count = 0; count < skip_fields && pos < size; count++)
937 + while (pos < size)
939 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
941 + if (convfail || !iswblank (wc))
943 + pos += mblength;
944 + break;
946 + pos += mblength;
949 + while (pos < size)
951 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
953 + if (!convfail && iswblank (wc))
954 + break;
956 + pos += mblength;
960 + /* skip fields. */
961 + for (count = 0; count < skip_chars && pos < size; count++)
963 + MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);
964 + pos += mblength;
967 + return lp + pos;
969 +#endif
971 /* Return false if two strings OLD and NEW match, true if not.
972 OLD and NEW point not to the beginnings of the lines
973 but rather to the beginnings of the fields to compare.
974 @@ -227,6 +330,8 @@
975 static bool
976 different (char *old, char *new, size_t oldlen, size_t newlen)
978 + char *copy_old, *copy_new;
980 if (check_chars < oldlen)
981 oldlen = check_chars;
982 if (check_chars < newlen)
983 @@ -234,14 +339,92 @@
985 if (ignore_case)
987 - /* FIXME: This should invoke strcoll somehow. */
988 - return oldlen != newlen || memcasecmp (old, new, oldlen);
989 + size_t i;
991 + copy_old = alloca (oldlen + 1);
992 + copy_new = alloca (oldlen + 1);
994 + for (i = 0; i < oldlen; i++)
996 + copy_old[i] = toupper (old[i]);
997 + copy_new[i] = toupper (new[i]);
1000 - else if (hard_LC_COLLATE)
1001 - return xmemcoll (old, oldlen, new, newlen) != 0;
1002 else
1003 - return oldlen != newlen || memcmp (old, new, oldlen);
1005 + copy_old = (char *)old;
1006 + copy_new = (char *)new;
1009 + return xmemcoll (copy_old, oldlen, copy_new, newlen);
1012 +#if HAVE_MBRTOWC
1013 +static int
1014 +different_multi (const char *old, const char *new, size_t oldlen, size_t newlen, mbstate_t oldstate, mbstate_t newstate)
1016 + size_t i, j, chars;
1017 + const char *str[2];
1018 + char *copy[2];
1019 + size_t len[2];
1020 + mbstate_t state[2];
1021 + size_t mblength;
1022 + wchar_t wc, uwc;
1023 + mbstate_t state_bak;
1025 + str[0] = old;
1026 + str[1] = new;
1027 + len[0] = oldlen;
1028 + len[1] = newlen;
1029 + state[0] = oldstate;
1030 + state[1] = newstate;
1032 + for (i = 0; i < 2; i++)
1034 + copy[i] = alloca (len[i] + 1);
1036 + for (j = 0, chars = 0; j < len[i] && chars < check_chars; chars++)
1038 + state_bak = state[i];
1039 + mblength = mbrtowc (&wc, str[i] + j, len[i] - j, &(state[i]));
1041 + switch (mblength)
1043 + case (size_t)-1:
1044 + case (size_t)-2:
1045 + state[i] = state_bak;
1046 + /* Fall through */
1047 + case 0:
1048 + mblength = 1;
1049 + break;
1051 + default:
1052 + if (ignore_case)
1054 + uwc = towupper (wc);
1056 + if (uwc != wc)
1058 + mbstate_t state_wc;
1060 + memset (&state_wc, '\0', sizeof(mbstate_t));
1061 + wcrtomb (copy[i] + j, uwc, &state_wc);
1063 + else
1064 + memcpy (copy[i] + j, str[i] + j, mblength);
1066 + else
1067 + memcpy (copy[i] + j, str[i] + j, mblength);
1069 + j += mblength;
1071 + copy[i][j] = '\0';
1072 + len[i] = j;
1075 + return xmemcoll (copy[0], len[0], copy[1], len[1]);
1077 +#endif
1079 /* Output the line in linebuffer LINE to standard output
1080 provided that the switches say it should be output.
1081 @@ -295,15 +478,43 @@
1083 char *prevfield IF_LINT (= NULL);
1084 size_t prevlen IF_LINT (= 0);
1085 +#if HAVE_MBRTOWC
1086 + mbstate_t prevstate;
1088 + memset (&prevstate, '\0', sizeof (mbstate_t));
1089 +#endif
1091 while (!feof (stdin))
1093 char *thisfield;
1094 size_t thislen;
1095 +#if HAVE_MBRTOWC
1096 + mbstate_t thisstate;
1097 +#endif
1099 if (readlinebuffer (thisline, stdin) == 0)
1100 break;
1101 thisfield = find_field (thisline);
1102 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1103 +#if HAVE_MBRTOWC
1104 + if (MB_CUR_MAX > 1)
1106 + thisstate = thisline->state;
1108 + if (prevline->length == 0 || different_multi
1109 + (thisfield, prevfield, thislen, prevlen, thisstate, prevstate))
1111 + fwrite (thisline->buffer, sizeof (char),
1112 + thisline->length, stdout);
1114 + SWAP_LINES (prevline, thisline);
1115 + prevfield = thisfield;
1116 + prevlen = thislen;
1117 + prevstate = thisstate;
1120 + else
1121 +#endif
1122 if (prevline->length == 0
1123 || different (thisfield, prevfield, thislen, prevlen))
1125 @@ -322,17 +533,26 @@
1126 size_t prevlen;
1127 uintmax_t match_count = 0;
1128 bool first_delimiter = true;
1129 +#if HAVE_MBRTOWC
1130 + mbstate_t prevstate;
1131 +#endif
1133 if (readlinebuffer (prevline, stdin) == 0)
1134 goto closefiles;
1135 prevfield = find_field (prevline);
1136 prevlen = prevline->length - 1 - (prevfield - prevline->buffer);
1137 +#if HAVE_MBRTOWC
1138 + prevstate = prevline->state;
1139 +#endif
1141 while (!feof (stdin))
1143 bool match;
1144 char *thisfield;
1145 size_t thislen;
1146 +#if HAVE_MBRTOWC
1147 + mbstate_t thisstate;
1148 +#endif
1149 if (readlinebuffer (thisline, stdin) == 0)
1151 if (ferror (stdin))
1152 @@ -341,6 +561,15 @@
1154 thisfield = find_field (thisline);
1155 thislen = thisline->length - 1 - (thisfield - thisline->buffer);
1156 +#if HAVE_MBRTOWC
1157 + if (MB_CUR_MAX > 1)
1159 + thisstate = thisline->state;
1160 + match = !different_multi (thisfield, prevfield,
1161 + thislen, prevlen, thisstate, prevstate);
1163 + else
1164 +#endif
1165 match = !different (thisfield, prevfield, thislen, prevlen);
1166 match_count += match;
1168 @@ -373,6 +602,9 @@
1169 SWAP_LINES (prevline, thisline);
1170 prevfield = thisfield;
1171 prevlen = thislen;
1172 +#if HAVE_MBRTOWC
1173 + prevstate = thisstate;
1174 +#endif
1175 if (!match)
1176 match_count = 0;
1178 @@ -417,6 +649,19 @@
1180 atexit (close_stdout);
1182 +#if HAVE_MBRTOWC
1183 + if (MB_CUR_MAX > 1)
1185 + find_field = find_field_multi;
1187 + else
1188 +#endif
1190 + find_field = find_field_uni;
1195 skip_chars = 0;
1196 skip_fields = 0;
1197 check_chars = SIZE_MAX;
1198 --- coreutils-6.8+/src/fold.c.i18n 2007-02-23 12:01:47.000000000 +0000
1199 +++ coreutils-6.8+/src/fold.c 2007-03-01 15:08:24.000000000 +0000
1200 @@ -23,11 +23,33 @@
1201 #include <getopt.h>
1202 #include <sys/types.h>
1204 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
1205 +#if HAVE_WCHAR_H
1206 +# include <wchar.h>
1207 +#endif
1209 +/* Get iswprint(), iswblank(), wcwidth(). */
1210 +#if HAVE_WCTYPE_H
1211 +# include <wctype.h>
1212 +#endif
1214 #include "system.h"
1215 #include "error.h"
1216 #include "quote.h"
1217 #include "xstrtol.h"
1219 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
1220 + installation; work around this configuration error. */
1221 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
1222 +# undef MB_LEN_MAX
1223 +# define MB_LEN_MAX 16
1224 +#endif
1226 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
1227 +#if HAVE_MBRTOWC && defined mbstate_t
1228 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
1229 +#endif
1231 #define TAB_WIDTH 8
1233 /* The official name of this program (e.g., no `g' prefix). */
1234 @@ -35,23 +57,44 @@
1236 #define AUTHORS "David MacKenzie"
1238 +#define FATAL_ERROR(Message) \
1239 + do \
1240 + { \
1241 + error (0, 0, (Message)); \
1242 + usage (2); \
1243 + } \
1244 + while (0)
1246 +enum operating_mode
1248 + /* Fold texts by columns that are at the given positions. */
1249 + column_mode,
1251 + /* Fold texts by bytes that are at the given positions. */
1252 + byte_mode,
1254 + /* Fold texts by characters that are at the given positions. */
1255 + character_mode,
1258 /* The name this program was run with. */
1259 char *program_name;
1261 +/* The argument shows current mode. (Default: column_mode) */
1262 +static enum operating_mode operating_mode;
1264 /* If nonzero, try to break on whitespace. */
1265 static bool break_spaces;
1267 -/* If nonzero, count bytes, not column positions. */
1268 -static bool count_bytes;
1270 /* If nonzero, at least one of the files we read was standard input. */
1271 static bool have_read_stdin;
1273 -static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
1274 +static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
1276 static struct option const longopts[] =
1278 {"bytes", no_argument, NULL, 'b'},
1279 + {"characters", no_argument, NULL, 'c'},
1280 {"spaces", no_argument, NULL, 's'},
1281 {"width", required_argument, NULL, 'w'},
1282 {GETOPT_HELP_OPTION_DECL},
1283 @@ -81,6 +124,7 @@
1284 "), stdout);
1285 fputs (_("\
1286 -b, --bytes count bytes rather than columns\n\
1287 + -c, --characters count characters rather than columns\n\
1288 -s, --spaces break at spaces\n\
1289 -w, --width=WIDTH use WIDTH columns instead of 80\n\
1290 "), stdout);
1291 @@ -98,7 +142,7 @@
1292 static size_t
1293 adjust_column (size_t column, char c)
1295 - if (!count_bytes)
1296 + if (operating_mode != byte_mode)
1298 if (c == '\b')
1300 @@ -121,30 +165,14 @@
1301 to stdout, with maximum line length WIDTH.
1302 Return true if successful. */
1304 -static bool
1305 -fold_file (char const *filename, size_t width)
1306 +static void
1307 +fold_text (FILE *istream, size_t width, int *saved_errno)
1309 - FILE *istream;
1310 int c;
1311 size_t column = 0; /* Screen column where next char will go. */
1312 size_t offset_out = 0; /* Index in `line_out' for next char. */
1313 static char *line_out = NULL;
1314 static size_t allocated_out = 0;
1315 - int saved_errno;
1317 - if (STREQ (filename, "-"))
1319 - istream = stdin;
1320 - have_read_stdin = true;
1322 - else
1323 - istream = fopen (filename, "r");
1325 - if (istream == NULL)
1327 - error (0, errno, "%s", filename);
1328 - return false;
1331 while ((c = getc (istream)) != EOF)
1333 @@ -172,6 +200,15 @@
1334 bool found_blank = false;
1335 size_t logical_end = offset_out;
1337 + /* If LINE_OUT has no wide character,
1338 + put a new wide character in LINE_OUT
1339 + if column is bigger than width. */
1340 + if (offset_out == 0)
1342 + line_out[offset_out++] = c;
1343 + continue;
1346 /* Look for the last blank. */
1347 while (logical_end)
1349 @@ -218,11 +255,225 @@
1350 line_out[offset_out++] = c;
1353 - saved_errno = errno;
1354 + *saved_errno = errno;
1356 + if (offset_out)
1357 + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1359 + free(line_out);
1362 +#if HAVE_MBRTOWC
1363 +static void
1364 +fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
1366 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
1367 + size_t buflen = 0; /* The length of the byte sequence in buf. */
1368 + char *bufpos; /* Next read position of BUF. */
1369 + wint_t wc; /* A gotten wide character. */
1370 + size_t mblength; /* The byte size of a multibyte character which shows
1371 + as same character as WC. */
1372 + mbstate_t state, state_bak; /* State of the stream. */
1373 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
1375 + char *line_out = NULL;
1376 + size_t offset_out = 0; /* Index in `line_out' for next char. */
1377 + size_t allocated_out = 0;
1379 + int increment;
1380 + size_t column = 0;
1382 + size_t last_blank_pos;
1383 + size_t last_blank_column;
1384 + int is_blank_seen;
1385 + int last_blank_increment;
1386 + int is_bs_following_last_blank;
1387 + size_t bs_following_last_blank_num;
1388 + int is_cr_after_last_blank;
1390 +#define CLEAR_FLAGS \
1391 + do \
1392 + { \
1393 + last_blank_pos = 0; \
1394 + last_blank_column = 0; \
1395 + is_blank_seen = 0; \
1396 + is_bs_following_last_blank = 0; \
1397 + bs_following_last_blank_num = 0; \
1398 + is_cr_after_last_blank = 0; \
1399 + } \
1400 + while (0)
1402 +#define START_NEW_LINE \
1403 + do \
1404 + { \
1405 + putchar ('\n'); \
1406 + column = 0; \
1407 + offset_out = 0; \
1408 + CLEAR_FLAGS; \
1409 + } \
1410 + while (0)
1412 + CLEAR_FLAGS;
1413 + memset (&state, '\0', sizeof(mbstate_t));
1415 + for (;; bufpos += mblength, buflen -= mblength)
1417 + if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
1419 + memmove (buf, bufpos, buflen);
1420 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
1421 + bufpos = buf;
1424 + if (buflen < 1)
1425 + break;
1427 + /* Get a wide character. */
1428 + convfail = 0;
1429 + state_bak = state;
1430 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
1432 + switch (mblength)
1434 + case (size_t)-1:
1435 + case (size_t)-2:
1436 + convfail++;
1437 + state = state_bak;
1438 + /* Fall through. */
1440 + case 0:
1441 + mblength = 1;
1442 + break;
1445 +rescan:
1446 + if (operating_mode == byte_mode) /* byte mode */
1447 + increment = mblength;
1448 + else if (operating_mode == character_mode) /* character mode */
1449 + increment = 1;
1450 + else /* column mode */
1452 + if (convfail)
1453 + increment = 1;
1454 + else
1456 + switch (wc)
1458 + case L'\n':
1459 + fwrite (line_out, sizeof(char), offset_out, stdout);
1460 + START_NEW_LINE;
1461 + continue;
1463 + case L'\b':
1464 + increment = (column > 0) ? -1 : 0;
1465 + break;
1467 + case L'\r':
1468 + increment = -1 * column;
1469 + break;
1471 + case L'\t':
1472 + increment = 8 - column % 8;
1473 + break;
1475 + default:
1476 + increment = wcwidth (wc);
1477 + increment = (increment < 0) ? 0 : increment;
1482 + if (column + increment > width && break_spaces && last_blank_pos)
1484 + fwrite (line_out, sizeof(char), last_blank_pos, stdout);
1485 + putchar ('\n');
1487 + offset_out = offset_out - last_blank_pos;
1488 + column = column - last_blank_column + ((is_cr_after_last_blank)
1489 + ? last_blank_increment : bs_following_last_blank_num);
1490 + memmove (line_out, line_out + last_blank_pos, offset_out);
1491 + CLEAR_FLAGS;
1492 + goto rescan;
1495 + if (column + increment > width && column != 0)
1497 + fwrite (line_out, sizeof(char), offset_out, stdout);
1498 + START_NEW_LINE;
1499 + goto rescan;
1502 + if (allocated_out < offset_out + mblength)
1504 + allocated_out += 1024;
1505 + line_out = xrealloc (line_out, allocated_out);
1508 + memcpy (line_out + offset_out, bufpos, mblength);
1509 + offset_out += mblength;
1510 + column += increment;
1512 + if (is_blank_seen && !convfail && wc == L'\r')
1513 + is_cr_after_last_blank = 1;
1515 + if (is_bs_following_last_blank && !convfail && wc == L'\b')
1516 + ++bs_following_last_blank_num;
1517 + else
1518 + is_bs_following_last_blank = 0;
1520 + if (break_spaces && !convfail && iswblank (wc))
1522 + last_blank_pos = offset_out;
1523 + last_blank_column = column;
1524 + is_blank_seen = 1;
1525 + last_blank_increment = increment;
1526 + is_bs_following_last_blank = 1;
1527 + bs_following_last_blank_num = 0;
1528 + is_cr_after_last_blank = 0;
1532 + *saved_errno = errno;
1534 if (offset_out)
1535 fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
1537 + free(line_out);
1539 +#endif
1541 +/* Fold file FILENAME, or standard input if FILENAME is "-",
1542 + to stdout, with maximum line length WIDTH.
1543 + Return 0 if successful, 1 if an error occurs. */
1545 +static bool
1546 +fold_file (char *filename, size_t width)
1548 + FILE *istream;
1549 + int saved_errno;
1551 + if (STREQ (filename, "-"))
1553 + istream = stdin;
1554 + have_read_stdin = 1;
1556 + else
1557 + istream = fopen (filename, "r");
1559 + if (istream == NULL)
1561 + error (0, errno, "%s", filename);
1562 + return 1;
1565 + /* Define how ISTREAM is being folded. */
1566 +#if HAVE_MBRTOWC
1567 + if (MB_CUR_MAX > 1)
1568 + fold_multibyte_text (istream, width, &saved_errno);
1569 + else
1570 +#endif
1571 + fold_text (istream, width, &saved_errno);
1573 if (ferror (istream))
1575 error (0, saved_errno, "%s", filename);
1576 @@ -255,7 +506,8 @@
1578 atexit (close_stdout);
1580 - break_spaces = count_bytes = have_read_stdin = false;
1581 + operating_mode = column_mode;
1582 + break_spaces = have_read_stdin = false;
1584 while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
1586 @@ -264,7 +516,15 @@
1587 switch (optc)
1589 case 'b': /* Count bytes rather than columns. */
1590 - count_bytes = true;
1591 + if (operating_mode != column_mode)
1592 + FATAL_ERROR (_("only one way of folding may be specified"));
1593 + operating_mode = byte_mode;
1594 + break;
1596 + case 'c':
1597 + if (operating_mode != column_mode)
1598 + FATAL_ERROR (_("only one way of folding may be specified"));
1599 + operating_mode = character_mode;
1600 break;
1602 case 's': /* Break at word boundaries. */
1603 --- coreutils-6.8+/src/sort.c.i18n 2007-02-24 11:23:23.000000000 +0000
1604 +++ coreutils-6.8+/src/sort.c 2007-03-01 15:10:57.000000000 +0000
1605 @@ -23,10 +23,19 @@
1607 #include <config.h>
1609 +#include <assert.h>
1610 #include <getopt.h>
1611 #include <sys/types.h>
1612 #include <sys/wait.h>
1613 #include <signal.h>
1614 +#if HAVE_WCHAR_H
1615 +# include <wchar.h>
1616 +#endif
1617 +/* Get isw* functions. */
1618 +#if HAVE_WCTYPE_H
1619 +# include <wctype.h>
1620 +#endif
1622 #include "system.h"
1623 #include "argmatch.h"
1624 #include "error.h"
1625 @@ -116,14 +125,38 @@
1626 /* Thousands separator; if -1, then there isn't one. */
1627 static int thousands_sep;
1629 +static int force_general_numcompare = 0;
1631 /* Nonzero if the corresponding locales are hard. */
1632 static bool hard_LC_COLLATE;
1633 -#if HAVE_NL_LANGINFO
1634 +#if HAVE_LANGINFO_CODESET
1635 static bool hard_LC_TIME;
1636 #endif
1638 #define NONZERO(x) ((x) != 0)
1640 +/* get a multibyte character's byte length. */
1641 +#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
1642 + do \
1643 + { \
1644 + wchar_t wc; \
1645 + mbstate_t state_bak; \
1647 + state_bak = STATE; \
1648 + mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
1650 + switch (MBLENGTH) \
1651 + { \
1652 + case (size_t)-1: \
1653 + case (size_t)-2: \
1654 + STATE = state_bak; \
1655 + /* Fall through. */ \
1656 + case 0: \
1657 + MBLENGTH = 1; \
1658 + } \
1659 + } \
1660 + while (0)
1662 /* The kind of blanks for '-b' to skip in various options. */
1663 enum blanktype { bl_start, bl_end, bl_both };
1665 @@ -261,13 +294,11 @@
1666 they were read if all keys compare equal. */
1667 static bool stable;
1669 -/* If TAB has this value, blanks separate fields. */
1670 -enum { TAB_DEFAULT = CHAR_MAX + 1 };
1672 -/* Tab character separating fields. If TAB_DEFAULT, then fields are
1673 +/* Tab character separating fields. If tab_length is 0, then fields are
1674 separated by the empty string between a non-blank character and a blank
1675 character. */
1676 -static int tab = TAB_DEFAULT;
1677 +static char tab[MB_LEN_MAX + 1];
1678 +static size_t tab_length = 0;
1680 /* Flag to remove consecutive duplicate lines from the output.
1681 Only the last of a sequence of equal lines will be output. */
1682 @@ -639,6 +670,44 @@
1683 update_proc (pid);
1686 +/* Function pointers. */
1687 +static void
1688 +(*inittables) (void);
1689 +static char *
1690 +(*begfield) (const struct line*, const struct keyfield *);
1691 +static char *
1692 +(*limfield) (const struct line*, const struct keyfield *);
1693 +static int
1694 +(*getmonth) (char const *, size_t);
1695 +static int
1696 +(*keycompare) (const struct line *, const struct line *);
1697 +static int
1698 +(*numcompare) (const char *, const char *);
1700 +/* Test for white space multibyte character.
1701 + Set LENGTH the byte length of investigated multibyte character. */
1702 +#if HAVE_MBRTOWC
1703 +static int
1704 +ismbblank (const char *str, size_t len, size_t *length)
1706 + size_t mblength;
1707 + wchar_t wc;
1708 + mbstate_t state;
1710 + memset (&state, '\0', sizeof(mbstate_t));
1711 + mblength = mbrtowc (&wc, str, len, &state);
1713 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
1715 + *length = 1;
1716 + return 0;
1719 + *length = (mblength < 1) ? 1 : mblength;
1720 + return iswblank (wc);
1722 +#endif
1724 /* Clean up any remaining temporary files. */
1726 static void
1727 @@ -978,7 +1047,7 @@
1728 free (node);
1731 -#if HAVE_NL_LANGINFO
1732 +#if HAVE_LANGINFO_CODESET
1734 static int
1735 struct_month_cmp (const void *m1, const void *m2)
1736 @@ -993,7 +1062,7 @@
1737 /* Initialize the character class tables. */
1739 static void
1740 -inittables (void)
1741 +inittables_uni (void)
1743 size_t i;
1745 @@ -1005,7 +1074,7 @@
1746 fold_toupper[i] = toupper (i);
1749 -#if HAVE_NL_LANGINFO
1750 +#if HAVE_LANGINFO_CODESET
1751 /* If we're not in the "C" locale, read different names for months. */
1752 if (hard_LC_TIME)
1754 @@ -1031,6 +1100,64 @@
1755 #endif
1758 +#if HAVE_MBRTOWC
1759 +static void
1760 +inittables_mb (void)
1762 + int i, j, k, l;
1763 + char *name, *s;
1764 + size_t s_len, mblength;
1765 + char mbc[MB_LEN_MAX];
1766 + wchar_t wc, pwc;
1767 + mbstate_t state_mb, state_wc;
1769 + for (i = 0; i < MONTHS_PER_YEAR; i++)
1771 + s = (char *) nl_langinfo (ABMON_1 + i);
1772 + s_len = strlen (s);
1773 + monthtab[i].name = name = (char *) xmalloc (s_len + 1);
1774 + monthtab[i].val = i + 1;
1776 + memset (&state_mb, '\0', sizeof (mbstate_t));
1777 + memset (&state_wc, '\0', sizeof (mbstate_t));
1779 + for (j = 0; j < s_len;)
1781 + if (!ismbblank (s + j, s_len - j, &mblength))
1782 + break;
1783 + j += mblength;
1786 + for (k = 0; j < s_len;)
1788 + mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
1789 + assert (mblength != (size_t)-1 && mblength != (size_t)-2);
1790 + if (mblength == 0)
1791 + break;
1793 + pwc = towupper (wc);
1794 + if (pwc == wc)
1796 + memcpy (mbc, s + j, mblength);
1797 + j += mblength;
1799 + else
1801 + j += mblength;
1802 + mblength = wcrtomb (mbc, pwc, &state_wc);
1803 + assert (mblength != (size_t)0 && mblength != (size_t)-1);
1806 + for (l = 0; l < mblength; l++)
1807 + name[k++] = mbc[l];
1809 + name[k] = '\0';
1811 + qsort ((void *) monthtab, MONTHS_PER_YEAR,
1812 + sizeof (struct month), struct_month_cmp);
1814 +#endif
1816 /* Specify the amount of main memory to use when sorting. */
1817 static void
1818 specify_sort_size (char const *s)
1819 @@ -1241,7 +1368,7 @@
1820 by KEY in LINE. */
1822 static char *
1823 -begfield (const struct line *line, const struct keyfield *key)
1824 +begfield_uni (const struct line *line, const struct keyfield *key)
1826 char *ptr = line->text, *lim = ptr + line->length - 1;
1827 size_t sword = key->sword;
1828 @@ -1251,10 +1378,10 @@
1829 /* The leading field separator itself is included in a field when -t
1830 is absent. */
1832 - if (tab != TAB_DEFAULT)
1833 + if (tab_length)
1834 while (ptr < lim && sword--)
1836 - while (ptr < lim && *ptr != tab)
1837 + while (ptr < lim && *ptr != tab[0])
1838 ++ptr;
1839 if (ptr < lim)
1840 ++ptr;
1841 @@ -1282,11 +1409,70 @@
1842 return ptr;
1845 +#if HAVE_MBRTOWC
1846 +static char *
1847 +begfield_mb (const struct line *line, const struct keyfield *key)
1849 + int i;
1850 + char *ptr = line->text, *lim = ptr + line->length - 1;
1851 + size_t sword = key->sword;
1852 + size_t schar = key->schar;
1853 + size_t mblength;
1854 + mbstate_t state;
1856 + memset (&state, '\0', sizeof(mbstate_t));
1858 + if (tab_length)
1859 + while (ptr < lim && sword--)
1861 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1863 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1864 + ptr += mblength;
1866 + if (ptr < lim)
1868 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1869 + ptr += mblength;
1872 + else
1873 + while (ptr < lim && sword--)
1875 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1876 + ptr += mblength;
1877 + if (ptr < lim)
1879 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1880 + ptr += mblength;
1882 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1883 + ptr += mblength;
1886 + if (key->skipsblanks)
1887 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1888 + ptr += mblength;
1890 + for (i = 0; i < schar; i++)
1892 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1894 + if (ptr + mblength > lim)
1895 + break;
1896 + else
1897 + ptr += mblength;
1900 + return ptr;
1902 +#endif
1904 /* Return the limit of (a pointer to the first character after) the field
1905 in LINE specified by KEY. */
1907 static char *
1908 -limfield (const struct line *line, const struct keyfield *key)
1909 +limfield_uni (const struct line *line, const struct keyfield *key)
1911 char *ptr = line->text, *lim = ptr + line->length - 1;
1912 size_t eword = key->eword, echar = key->echar;
1913 @@ -1299,10 +1485,10 @@
1914 `beginning' is the first character following the delimiting TAB.
1915 Otherwise, leave PTR pointing at the first `blank' character after
1916 the preceding field. */
1917 - if (tab != TAB_DEFAULT)
1918 + if (tab_length)
1919 while (ptr < lim && eword--)
1921 - while (ptr < lim && *ptr != tab)
1922 + while (ptr < lim && *ptr != tab[0])
1923 ++ptr;
1924 if (ptr < lim && (eword | echar))
1925 ++ptr;
1926 @@ -1348,10 +1534,10 @@
1929 /* Make LIM point to the end of (one byte past) the current field. */
1930 - if (tab != TAB_DEFAULT)
1931 + if (tab_length)
1933 char *newlim;
1934 - newlim = memchr (ptr, tab, lim - ptr);
1935 + newlim = memchr (ptr, tab[0], lim - ptr);
1936 if (newlim)
1937 lim = newlim;
1939 @@ -1384,6 +1570,107 @@
1940 return ptr;
1943 +#if HAVE_MBRTOWC
1944 +static char *
1945 +limfield_mb (const struct line *line, const struct keyfield *key)
1947 + char *ptr = line->text, *lim = ptr + line->length - 1;
1948 + size_t eword = key->eword, echar = key->echar;
1949 + int i;
1950 + size_t mblength;
1951 + mbstate_t state;
1953 + memset (&state, '\0', sizeof(mbstate_t));
1955 + if (tab_length)
1956 + while (ptr < lim && eword--)
1958 + while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1960 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1961 + ptr += mblength;
1963 + if (ptr < lim && (eword | echar))
1965 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1966 + ptr += mblength;
1969 + else
1970 + while (ptr < lim && eword--)
1972 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1973 + ptr += mblength;
1974 + if (ptr < lim)
1976 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1977 + ptr += mblength;
1979 + while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1980 + ptr += mblength;
1984 +# ifdef POSIX_UNSPECIFIED
1985 + /* Make LIM point to the end of (one byte past) the current field. */
1986 + if (tab_length)
1988 + char *newlim, *p;
1990 + newlim = NULL;
1991 + for (p = ptr; p < lim;)
1993 + if (memcmp (p, tab, tab_length) == 0)
1995 + newlim = p;
1996 + break;
1999 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2000 + p += mblength;
2003 + else
2005 + char *newlim;
2006 + newlim = ptr;
2008 + while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
2009 + newlim += mblength;
2010 + if (ptr < lim)
2012 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2013 + ptr += mblength;
2015 + while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
2016 + newlim += mblength;
2017 + lim = newlim;
2019 +# endif
2021 + /* If we're skipping leading blanks, don't start counting characters
2022 + * until after skipping past any leading blanks. */
2023 + if (key->skipsblanks)
2024 + while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
2025 + ptr += mblength;
2027 + memset (&state, '\0', sizeof(mbstate_t));
2029 + /* Advance PTR by ECHAR (if possible), but no further than LIM. */
2030 + for (i = 0; i < echar; i++)
2032 + GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
2034 + if (ptr + mblength > lim)
2035 + break;
2036 + else
2037 + ptr += mblength;
2040 + return ptr;
2042 +#endif
2044 /* Fill BUF reading from FP, moving buf->left bytes from the end
2045 of buf->buf to the beginning first. If EOF is reached and the
2046 file wasn't terminated by a newline, supply one. Set up BUF's line
2047 @@ -1466,8 +1753,24 @@
2048 else
2050 if (key->skipsblanks)
2051 - while (blanks[to_uchar (*line_start)])
2052 - line_start++;
2054 +#if HAVE_MBRTOWC
2055 + if (MB_CUR_MAX > 1)
2057 + size_t mblength;
2058 + mbstate_t state;
2059 + memset (&state, '\0', sizeof(mbstate_t));
2060 + while (line_start < line->keylim &&
2061 + ismbblank (line_start,
2062 + line->keylim - line_start,
2063 + &mblength))
2064 + line_start += mblength;
2066 + else
2067 +#endif
2068 + while (blanks[to_uchar (*line_start)])
2069 + line_start++;
2071 line->keybeg = line_start;
2074 @@ -1500,7 +1803,7 @@
2075 hideously fast. */
2077 static int
2078 -numcompare (const char *a, const char *b)
2079 +numcompare_uni (const char *a, const char *b)
2081 while (blanks[to_uchar (*a)])
2082 a++;
2083 @@ -1510,6 +1813,25 @@
2084 return strnumcmp (a, b, decimal_point, thousands_sep);
2087 +#if HAVE_MBRTOWC
2088 +static int
2089 +numcompare_mb (const char *a, const char *b)
2091 + size_t mblength, len;
2092 + len = strlen (a); /* okay for UTF-8 */
2093 + while (*a && ismbblank (a, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2095 + a += mblength;
2096 + len -= mblength;
2098 + len = strlen (b); /* okay for UTF-8 */
2099 + while (*b && ismbblank (b, len > MB_CUR_MAX ? MB_CUR_MAX : len, &mblength))
2100 + b += mblength;
2102 + return strnumcmp (a, b, decimal_point, thousands_sep);
2104 +#endif /* HAV_EMBRTOWC */
2106 static int
2107 general_numcompare (const char *sa, const char *sb)
2109 @@ -1543,7 +1865,7 @@
2110 Return 0 if the name in S is not recognized. */
2112 static int
2113 -getmonth (char const *month, size_t len)
2114 +getmonth_uni (char const *month, size_t len)
2116 size_t lo = 0;
2117 size_t hi = MONTHS_PER_YEAR;
2118 @@ -1698,11 +2020,79 @@
2119 return diff;
2122 +#if HAVE_MBRTOWC
2123 +static int
2124 +getmonth_mb (const char *s, size_t len)
2126 + char *month;
2127 + register size_t i;
2128 + register int lo = 0, hi = MONTHS_PER_YEAR, result;
2129 + char *tmp;
2130 + size_t wclength, mblength;
2131 + const char **pp;
2132 + const wchar_t **wpp;
2133 + wchar_t *month_wcs;
2134 + mbstate_t state;
2136 + while (len > 0 && ismbblank (s, len, &mblength))
2138 + s += mblength;
2139 + len -= mblength;
2142 + if (len == 0)
2143 + return 0;
2145 + month = (char *) alloca (len + 1);
2147 + tmp = (char *) alloca (len + 1);
2148 + memcpy (tmp, s, len);
2149 + tmp[len] = '\0';
2150 + pp = (const char **)&tmp;
2151 + month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t));
2152 + memset (&state, '\0', sizeof(mbstate_t));
2154 + wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
2155 + assert (wclength != (size_t)-1 && *pp == NULL);
2157 + for (i = 0; i < wclength; i++)
2159 + month_wcs[i] = towupper(month_wcs[i]);
2160 + if (iswblank (month_wcs[i]))
2162 + month_wcs[i] = L'\0';
2163 + break;
2167 + wpp = (const wchar_t **)&month_wcs;
2169 + mblength = wcsrtombs (month, wpp, len + 1, &state);
2170 + assert (mblength != (-1) && *wpp == NULL);
2172 + do
2174 + int ix = (lo + hi) / 2;
2176 + if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
2177 + hi = ix;
2178 + else
2179 + lo = ix;
2181 + while (hi - lo > 1);
2183 + result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
2184 + ? monthtab[lo].val : 0);
2186 + return result;
2188 +#endif
2190 /* Compare two lines A and B trying every key in sequence until there
2191 are no more keys or a difference is found. */
2193 static int
2194 -keycompare (const struct line *a, const struct line *b)
2195 +keycompare_uni (const struct line *a, const struct line *b)
2197 struct keyfield const *key = keylist;
2199 @@ -1875,6 +2265,177 @@
2200 return key->reverse ? -diff : diff;
2203 +#if HAVE_MBRTOWC
2204 +static int
2205 +keycompare_mb (const struct line *a, const struct line *b)
2207 + struct keyfield *key = keylist;
2209 + /* For the first iteration only, the key positions have been
2210 + precomputed for us. */
2211 + char *texta = a->keybeg;
2212 + char *textb = b->keybeg;
2213 + char *lima = a->keylim;
2214 + char *limb = b->keylim;
2216 + size_t mblength_a, mblength_b;
2217 + wchar_t wc_a, wc_b;
2218 + mbstate_t state_a, state_b;
2220 + int diff;
2222 + memset (&state_a, '\0', sizeof(mbstate_t));
2223 + memset (&state_b, '\0', sizeof(mbstate_t));
2225 + for (;;)
2227 + unsigned char *translate = (unsigned char *) key->translate;
2228 + bool const *ignore = key->ignore;
2230 + /* Find the lengths. */
2231 + size_t lena = lima <= texta ? 0 : lima - texta;
2232 + size_t lenb = limb <= textb ? 0 : limb - textb;
2234 + /* Actually compare the fields. */
2235 + if (key->numeric | key->general_numeric)
2237 + char savea = *lima, saveb = *limb;
2239 + *lima = *limb = '\0';
2240 + if (force_general_numcompare)
2241 + diff = general_numcompare (texta, textb);
2242 + else
2243 + diff = ((key->numeric ? numcompare : general_numcompare)
2244 + (texta, textb));
2245 + *lima = savea, *limb = saveb;
2247 + else if (key->month)
2248 + diff = getmonth (texta, lena) - getmonth (textb, lenb);
2249 + else
2251 + if (ignore || translate)
2253 + char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
2254 + char *copy_b = copy_a + lena + 1;
2255 + size_t new_len_a, new_len_b;
2256 + size_t i, j;
2258 + /* Ignore and/or translate chars before comparing. */
2259 +# define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \
2260 + do \
2261 + { \
2262 + wchar_t uwc; \
2263 + char mbc[MB_LEN_MAX]; \
2264 + mbstate_t state_wc; \
2266 + for (NEW_LEN = i = 0; i < LEN;) \
2267 + { \
2268 + mbstate_t state_bak; \
2270 + state_bak = STATE; \
2271 + MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \
2273 + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \
2274 + || MBLENGTH == 0) \
2275 + { \
2276 + if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \
2277 + STATE = state_bak; \
2278 + if (!ignore) \
2279 + COPY[NEW_LEN++] = TEXT[i++]; \
2280 + continue; \
2281 + } \
2283 + if (ignore) \
2284 + { \
2285 + if ((ignore == nonprinting && !iswprint (WC)) \
2286 + || (ignore == nondictionary \
2287 + && !iswalnum (WC) && !iswblank (WC))) \
2288 + { \
2289 + i += MBLENGTH; \
2290 + continue; \
2291 + } \
2292 + } \
2294 + if (translate) \
2295 + { \
2297 + uwc = towupper(WC); \
2298 + if (WC == uwc) \
2299 + { \
2300 + memcpy (mbc, TEXT + i, MBLENGTH); \
2301 + i += MBLENGTH; \
2302 + } \
2303 + else \
2304 + { \
2305 + i += MBLENGTH; \
2306 + WC = uwc; \
2307 + memset (&state_wc, '\0', sizeof (mbstate_t)); \
2309 + MBLENGTH = wcrtomb (mbc, WC, &state_wc); \
2310 + assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \
2311 + } \
2313 + for (j = 0; j < MBLENGTH; j++) \
2314 + COPY[NEW_LEN++] = mbc[j]; \
2315 + } \
2316 + else \
2317 + for (j = 0; j < MBLENGTH; j++) \
2318 + COPY[NEW_LEN++] = TEXT[i++]; \
2319 + } \
2320 + COPY[NEW_LEN] = '\0'; \
2321 + } \
2322 + while (0)
2323 + IGNORE_CHARS (new_len_a, lena, texta, copy_a,
2324 + wc_a, mblength_a, state_a);
2325 + IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
2326 + wc_b, mblength_b, state_b);
2327 + diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
2329 + else if (lena == 0)
2330 + diff = - NONZERO (lenb);
2331 + else if (lenb == 0)
2332 + goto greater;
2333 + else
2334 + diff = xmemcoll (texta, lena, textb, lenb);
2337 + if (diff)
2338 + goto not_equal;
2340 + key = key->next;
2341 + if (! key)
2342 + break;
2344 + /* Find the beginning and limit of the next field. */
2345 + if (key->eword != -1)
2346 + lima = limfield (a, key), limb = limfield (b, key);
2347 + else
2348 + lima = a->text + a->length - 1, limb = b->text + b->length - 1;
2350 + if (key->sword != -1)
2351 + texta = begfield (a, key), textb = begfield (b, key);
2352 + else
2354 + texta = a->text, textb = b->text;
2355 + if (key->skipsblanks)
2357 + while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
2358 + texta += mblength_a;
2359 + while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
2360 + textb += mblength_b;
2365 + return 0;
2367 +greater:
2368 + diff = 1;
2369 +not_equal:
2370 + return key->reverse ? -diff : diff;
2372 +#endif
2374 /* Compare two lines A and B, returning negative, zero, or positive
2375 depending on whether A compares less than, equal to, or greater than B. */
2377 @@ -2744,7 +3305,7 @@
2378 initialize_exit_failure (SORT_FAILURE);
2380 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2381 -#if HAVE_NL_LANGINFO
2382 +#if HAVE_LANGINFO_CODESET
2383 hard_LC_TIME = hard_locale (LC_TIME);
2384 #endif
2386 @@ -2765,6 +3326,27 @@
2387 thousands_sep = -1;
2390 +#if HAVE_MBRTOWC
2391 + if (MB_CUR_MAX > 1)
2393 + inittables = inittables_mb;
2394 + begfield = begfield_mb;
2395 + limfield = limfield_mb;
2396 + getmonth = getmonth_mb;
2397 + keycompare = keycompare_mb;
2398 + numcompare = numcompare_mb;
2400 + else
2401 +#endif
2403 + inittables = inittables_uni;
2404 + begfield = begfield_uni;
2405 + limfield = limfield_uni;
2406 + getmonth = getmonth_uni;
2407 + keycompare = keycompare_uni;
2408 + numcompare = numcompare_uni;
2411 have_read_stdin = false;
2412 inittables ();
2414 @@ -3015,13 +3597,35 @@
2416 case 't':
2418 - char newtab = optarg[0];
2419 - if (! newtab)
2420 + char newtab[MB_LEN_MAX + 1];
2421 + size_t newtab_length = 1;
2422 + strncpy (newtab, optarg, MB_LEN_MAX);
2423 + if (! newtab[0])
2424 error (SORT_FAILURE, 0, _("empty tab"));
2425 - if (optarg[1])
2426 +#if HAVE_MBRTOWC
2427 + if (MB_CUR_MAX > 1)
2429 + wchar_t wc;
2430 + mbstate_t state;
2431 + size_t i;
2433 + memset (&state, '\0', sizeof (mbstate_t));
2434 + newtab_length = mbrtowc (&wc, newtab, strnlen (newtab,
2435 + MB_LEN_MAX),
2436 + &state);
2437 + switch (newtab_length)
2439 + case (size_t) -1:
2440 + case (size_t) -2:
2441 + case 0:
2442 + newtab_length = 1;
2445 +#endif
2446 + if (newtab_length == 1 && optarg[1])
2448 if (STREQ (optarg, "\\0"))
2449 - newtab = '\0';
2450 + newtab[0] = '\0';
2451 else
2453 /* Provoke with `sort -txx'. Complain about
2454 @@ -3032,9 +3636,12 @@
2455 quote (optarg));
2458 - if (tab != TAB_DEFAULT && tab != newtab)
2459 + if (tab_length
2460 + && (tab_length != newtab_length
2461 + || memcmp (tab, newtab, tab_length) != 0))
2462 error (SORT_FAILURE, 0, _("incompatible tabs"));
2463 - tab = newtab;
2464 + memcpy (tab, newtab, newtab_length);
2465 + tab_length = newtab_length;
2467 break;
2469 --- coreutils-6.8+/src/unexpand.c.i18n 2007-01-14 15:41:28.000000000 +0000
2470 +++ coreutils-6.8+/src/unexpand.c 2007-03-01 15:08:24.000000000 +0000
2471 @@ -39,11 +39,28 @@
2472 #include <stdio.h>
2473 #include <getopt.h>
2474 #include <sys/types.h>
2476 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
2477 +#if HAVE_WCHAR_H
2478 +# include <wchar.h>
2479 +#endif
2481 #include "system.h"
2482 #include "error.h"
2483 #include "quote.h"
2484 #include "xstrndup.h"
2486 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
2487 + installation; work around this configuration error. */
2488 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
2489 +# define MB_LEN_MAX 16
2490 +#endif
2492 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
2493 +#if HAVE_MBRTOWC && defined mbstate_t
2494 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
2495 +#endif
2497 /* The official name of this program (e.g., no `g' prefix). */
2498 #define PROGRAM_NAME "unexpand"
2500 @@ -110,6 +127,208 @@
2501 {NULL, 0, NULL, 0}
2504 +static FILE *next_file (FILE *fp);
2506 +#if HAVE_MBRTOWC
2507 +static void
2508 +unexpand_multibyte (void)
2510 + FILE *fp; /* Input stream. */
2511 + mbstate_t i_state; /* Current shift state of the input stream. */
2512 + mbstate_t i_state_bak; /* Back up the I_STATE. */
2513 + mbstate_t o_state; /* Current shift state of the output stream. */
2514 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
2515 + char *bufpos; /* Next read position of BUF. */
2516 + size_t buflen = 0; /* The length of the byte sequence in buf. */
2517 + wint_t wc; /* A gotten wide character. */
2518 + size_t mblength; /* The byte size of a multibyte character
2519 + which shows as same character as WC. */
2521 + /* Index in `tab_list' of next tabstop: */
2522 + int tab_index = 0; /* For calculating width of pending tabs. */
2523 + int print_tab_index = 0; /* For printing as many tabs as possible. */
2524 + unsigned int column = 0; /* Column on screen of next char. */
2525 + int next_tab_column; /* Column the next tab stop is on. */
2526 + int convert = 1; /* If nonzero, perform translations. */
2527 + unsigned int pending = 0; /* Pending columns of blanks. */
2529 + fp = next_file ((FILE *) NULL);
2530 + if (fp == NULL)
2531 + return;
2533 + memset (&o_state, '\0', sizeof(mbstate_t));
2534 + memset (&i_state, '\0', sizeof(mbstate_t));
2536 + for (;;)
2538 + if (buflen < MB_LEN_MAX && !feof(fp) && !ferror(fp))
2540 + memmove (buf, bufpos, buflen);
2541 + buflen += fread (buf + buflen, sizeof(char), BUFSIZ, fp);
2542 + bufpos = buf;
2545 + /* Get a wide character. */
2546 + if (buflen < 1)
2548 + mblength = 1;
2549 + wc = WEOF;
2551 + else
2553 + i_state_bak = i_state;
2554 + mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &i_state);
2557 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2559 + i_state = i_state_bak;
2560 + wc = L'\0';
2563 + if (wc == L' ' && convert && column < INT_MAX)
2565 + ++pending;
2566 + ++column;
2568 + else if (wc == L'\t' && convert)
2570 + if (tab_size == 0)
2572 + /* Do not let tab_index == first_free_tab;
2573 + stop when it is 1 less. */
2574 + while (tab_index < first_free_tab - 1
2575 + && column >= tab_list[tab_index])
2576 + tab_index++;
2577 + next_tab_column = tab_list[tab_index];
2578 + if (tab_index < first_free_tab - 1)
2579 + tab_index++;
2580 + if (column >= next_tab_column)
2582 + convert = 0; /* Ran out of tab stops. */
2583 + goto flush_pend_mb;
2586 + else
2588 + next_tab_column = column + tab_size - column % tab_size;
2590 + pending += next_tab_column - column;
2591 + column = next_tab_column;
2593 + else
2595 +flush_pend_mb:
2596 + /* Flush pending spaces. Print as many tabs as possible,
2597 + then print the rest as spaces. */
2598 + if (pending == 1)
2600 + putchar (' ');
2601 + pending = 0;
2603 + column -= pending;
2604 + while (pending > 0)
2606 + if (tab_size == 0)
2608 + /* Do not let print_tab_index == first_free_tab;
2609 + stop when it is 1 less. */
2610 + while (print_tab_index < first_free_tab - 1
2611 + && column >= tab_list[print_tab_index])
2612 + print_tab_index++;
2613 + next_tab_column = tab_list[print_tab_index];
2614 + if (print_tab_index < first_free_tab - 1)
2615 + print_tab_index++;
2617 + else
2619 + next_tab_column =
2620 + column + tab_size - column % tab_size;
2622 + if (next_tab_column - column <= pending)
2624 + putchar ('\t');
2625 + pending -= next_tab_column - column;
2626 + column = next_tab_column;
2628 + else
2630 + --print_tab_index;
2631 + column += pending;
2632 + while (pending != 0)
2634 + putchar (' ');
2635 + pending--;
2640 + if (wc == WEOF)
2642 + fp = next_file (fp);
2643 + if (fp == NULL)
2644 + break; /* No more files. */
2645 + else
2647 + memset (&i_state, '\0', sizeof(mbstate_t));
2648 + continue;
2652 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2654 + if (convert)
2656 + ++column;
2657 + if (convert_entire_line == 0)
2658 + convert = 0;
2660 + mblength = 1;
2661 + putchar (buf[0]);
2663 + else if (mblength == 0)
2665 + if (convert && convert_entire_line == 0)
2666 + convert = 0;
2667 + mblength = 1;
2668 + putchar ('\0');
2670 + else
2672 + if (convert)
2674 + if (wc == L'\b')
2676 + if (column > 0)
2677 + --column;
2679 + else
2681 + int width; /* The width of WC. */
2683 + width = wcwidth (wc);
2684 + column += (width > 0) ? width : 0;
2685 + if (convert_entire_line == 0)
2686 + convert = 0;
2690 + if (wc == L'\n')
2692 + tab_index = print_tab_index = 0;
2693 + column = pending = 0;
2694 + convert = 1;
2696 + fwrite (bufpos, sizeof(char), mblength, stdout);
2699 + buflen -= mblength;
2700 + bufpos += mblength;
2703 +#endif
2706 void
2707 usage (int status)
2709 @@ -531,7 +750,12 @@
2711 file_list = (optind < argc ? &argv[optind] : stdin_argv);
2713 - unexpand ();
2714 +#if HAVE_MBRTOWC
2715 + if (MB_CUR_MAX > 1)
2716 + unexpand_multibyte ();
2717 + else
2718 +#endif
2719 + unexpand ();
2721 if (have_read_stdin && fclose (stdin) != 0)
2722 error (EXIT_FAILURE, errno, "-");
2723 --- coreutils-6.8+/src/pr.c.i18n 2007-01-14 15:41:28.000000000 +0000
2724 +++ coreutils-6.8+/src/pr.c 2007-03-01 15:08:24.000000000 +0000
2725 @@ -313,6 +313,32 @@
2727 #include <getopt.h>
2728 #include <sys/types.h>
2730 +/* Get MB_LEN_MAX. */
2731 +#include <limits.h>
2732 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
2733 + installation; work around this configuration error. */
2734 +#if !defined MB_LEN_MAX || MB_LEN_MAX == 1
2735 +# define MB_LEN_MAX 16
2736 +#endif
2738 +/* Get MB_CUR_MAX. */
2739 +#include <stdlib.h>
2741 +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
2742 +/* Get mbstate_t, mbrtowc(), wcwidth(). */
2743 +#if HAVE_WCHAR_H
2744 +# include <wchar.h>
2745 +#endif
2747 +/* Get iswprint(). -- for wcwidth(). */
2748 +#if HAVE_WCTYPE_H
2749 +# include <wctype.h>
2750 +#endif
2751 +#if !defined iswprint && !HAVE_ISWPRINT
2752 +# define iswprint(wc) 1
2753 +#endif
2755 #include "system.h"
2756 #include "error.h"
2757 #include "hard-locale.h"
2758 @@ -324,6 +350,18 @@
2759 #include "strftime.h"
2760 #include "xstrtol.h"
2762 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
2763 +#if HAVE_MBRTOWC && defined mbstate_t
2764 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
2765 +#endif
2767 +#ifndef HAVE_DECL_WCWIDTH
2768 +"this configure-time declaration test was not run"
2769 +#endif
2770 +#if !HAVE_DECL_WCWIDTH
2771 +extern int wcwidth ();
2772 +#endif
2774 /* The official name of this program (e.g., no `g' prefix). */
2775 #define PROGRAM_NAME "pr"
2777 @@ -416,7 +454,20 @@
2779 #define NULLCOL (COLUMN *)0
2781 -static int char_to_clump (char c);
2782 +/* Funtion pointers to switch functions for single byte locale or for
2783 + multibyte locale. If multibyte functions do not exist in your sysytem,
2784 + these pointers always point the function for single byte locale. */
2785 +static void (*print_char) (char c);
2786 +static int (*char_to_clump) (char c);
2788 +/* Functions for single byte locale. */
2789 +static void print_char_single (char c);
2790 +static int char_to_clump_single (char c);
2792 +/* Functions for multibyte locale. */
2793 +static void print_char_multi (char c);
2794 +static int char_to_clump_multi (char c);
2796 static bool read_line (COLUMN *p);
2797 static bool print_page (void);
2798 static bool print_stored (COLUMN *p);
2799 @@ -426,6 +477,7 @@
2800 static void pad_across_to (int position);
2801 static void add_line_number (COLUMN *p);
2802 static void getoptarg (char *arg, char switch_char, char *character,
2803 + int *character_length, int *character_width,
2804 int *number);
2805 void usage (int status);
2806 static void print_files (int number_of_files, char **av);
2807 @@ -440,7 +492,6 @@
2808 static void pad_down (int lines);
2809 static void read_rest_of_line (COLUMN *p);
2810 static void skip_read (COLUMN *p, int column_number);
2811 -static void print_char (char c);
2812 static void cleanup (void);
2813 static void print_sep_string (void);
2814 static void separator_string (const char *optarg_S);
2815 @@ -455,7 +506,7 @@
2816 we store the leftmost columns contiguously in buff.
2817 To print a line from buff, get the index of the first character
2818 from line_vector[i], and print up to line_vector[i + 1]. */
2819 -static char *buff;
2820 +static unsigned char *buff;
2822 /* Index of the position in buff where the next character
2823 will be stored. */
2824 @@ -559,7 +610,7 @@
2825 static bool untabify_input = false;
2827 /* (-e) The input tab character. */
2828 -static char input_tab_char = '\t';
2829 +static char input_tab_char[MB_LEN_MAX] = "\t";
2831 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
2832 where the leftmost column is 1. */
2833 @@ -569,7 +620,10 @@
2834 static bool tabify_output = false;
2836 /* (-i) The output tab character. */
2837 -static char output_tab_char = '\t';
2838 +static char output_tab_char[MB_LEN_MAX] = "\t";
2840 +/* (-i) The byte length of output tab character. */
2841 +static int output_tab_char_length = 1;
2843 /* (-i) The width of the output tab. */
2844 static int chars_per_output_tab = 8;
2845 @@ -643,7 +697,13 @@
2846 static bool numbered_lines = false;
2848 /* (-n) Character which follows each line number. */
2849 -static char number_separator = '\t';
2850 +static char number_separator[MB_LEN_MAX] = "\t";
2852 +/* (-n) The byte length of the character which follows each line number. */
2853 +static int number_separator_length = 1;
2855 +/* (-n) The character width of the character which follows each line number. */
2856 +static int number_separator_width = 0;
2858 /* (-n) line counting starts with 1st line of input file (not with 1st
2859 line of 1st page printed). */
2860 @@ -696,6 +756,7 @@
2861 -a|COLUMN|-m is a `space' and with the -J option a `tab'. */
2862 static char *col_sep_string = "";
2863 static int col_sep_length = 0;
2864 +static int col_sep_width = 0;
2865 static char *column_separator = " ";
2866 static char *line_separator = "\t";
2868 @@ -852,6 +913,13 @@
2869 col_sep_length = (int) strlen (optarg_S);
2870 col_sep_string = xmalloc (col_sep_length + 1);
2871 strcpy (col_sep_string, optarg_S);
2873 +#if HAVE_MBRTOWC
2874 + if (MB_CUR_MAX > 1)
2875 + col_sep_width = mbswidth (col_sep_string, 0);
2876 + else
2877 +#endif
2878 + col_sep_width = col_sep_length;
2882 @@ -877,6 +945,21 @@
2884 atexit (close_stdout);
2886 +/* Define which functions are used, the ones for single byte locale or the ones
2887 + for multibyte locale. */
2888 +#if HAVE_MBRTOWC
2889 + if (MB_CUR_MAX > 1)
2891 + print_char = print_char_multi;
2892 + char_to_clump = char_to_clump_multi;
2894 + else
2895 +#endif
2897 + print_char = print_char_single;
2898 + char_to_clump = char_to_clump_single;
2901 n_files = 0;
2902 file_names = (argc > 1
2903 ? xmalloc ((argc - 1) * sizeof (char *))
2904 @@ -949,8 +1032,12 @@
2905 break;
2906 case 'e':
2907 if (optarg)
2908 - getoptarg (optarg, 'e', &input_tab_char,
2909 - &chars_per_input_tab);
2911 + int dummy_length, dummy_width;
2913 + getoptarg (optarg, 'e', input_tab_char, &dummy_length,
2914 + &dummy_width, &chars_per_input_tab);
2916 /* Could check tab width > 0. */
2917 untabify_input = true;
2918 break;
2919 @@ -963,8 +1050,12 @@
2920 break;
2921 case 'i':
2922 if (optarg)
2923 - getoptarg (optarg, 'i', &output_tab_char,
2924 - &chars_per_output_tab);
2926 + int dummy_width;
2928 + getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
2929 + &dummy_width, &chars_per_output_tab);
2931 /* Could check tab width > 0. */
2932 tabify_output = true;
2933 break;
2934 @@ -991,8 +1082,8 @@
2935 case 'n':
2936 numbered_lines = true;
2937 if (optarg)
2938 - getoptarg (optarg, 'n', &number_separator,
2939 - &chars_per_number);
2940 + getoptarg (optarg, 'n', number_separator, &number_separator_length,
2941 + &number_separator_width, &chars_per_number);
2942 break;
2943 case 'N':
2944 skip_count = false;
2945 @@ -1031,7 +1122,7 @@
2946 old_s = false;
2947 /* Reset an additional input of -s, -S dominates -s */
2948 col_sep_string = "";
2949 - col_sep_length = 0;
2950 + col_sep_length = col_sep_width = 0;
2951 use_col_separator = true;
2952 if (optarg)
2953 separator_string (optarg);
2954 @@ -1188,10 +1279,45 @@
2955 a number. */
2957 static void
2958 -getoptarg (char *arg, char switch_char, char *character, int *number)
2959 +getoptarg (char *arg, char switch_char, char *character, int *character_length,
2960 + int *character_width, int *number)
2962 if (!ISDIGIT (*arg))
2963 - *character = *arg++;
2965 +#ifdef HAVE_MBRTOWC
2966 + if (MB_CUR_MAX > 1) /* for multibyte locale. */
2968 + wchar_t wc;
2969 + size_t mblength;
2970 + int width;
2971 + mbstate_t state = {'\0'};
2973 + mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
2975 + if (mblength == (size_t)-1 || mblength == (size_t)-2)
2977 + *character_length = 1;
2978 + *character_width = 1;
2980 + else
2982 + *character_length = (mblength < 1) ? 1 : mblength;
2983 + width = wcwidth (wc);
2984 + *character_width = (width < 0) ? 0 : width;
2987 + strncpy (character, arg, *character_length);
2988 + arg += *character_length;
2990 + else /* for single byte locale. */
2991 +#endif
2993 + *character = *arg++;
2994 + *character_length = 1;
2995 + *character_width = 1;
2999 if (*arg)
3001 long int tmp_long;
3002 @@ -1256,7 +1382,7 @@
3003 else
3004 col_sep_string = column_separator;
3006 - col_sep_length = 1;
3007 + col_sep_length = col_sep_width = 1;
3008 use_col_separator = true;
3010 /* It's rather pointless to define a TAB separator with column
3011 @@ -1288,11 +1414,11 @@
3012 TAB_WIDTH (chars_per_input_tab, chars_per_number); */
3014 /* Estimate chars_per_text without any margin and keep it constant. */
3015 - if (number_separator == '\t')
3016 + if (number_separator[0] == '\t')
3017 number_width = chars_per_number +
3018 TAB_WIDTH (chars_per_default_tab, chars_per_number);
3019 else
3020 - number_width = chars_per_number + 1;
3021 + number_width = chars_per_number + number_separator_width;
3023 /* The number is part of the column width unless we are
3024 printing files in parallel. */
3025 @@ -1307,7 +1433,7 @@
3028 chars_per_column = (chars_per_line - chars_used_by_number -
3029 - (columns - 1) * col_sep_length) / columns;
3030 + (columns - 1) * col_sep_width) / columns;
3032 if (chars_per_column < 1)
3033 error (EXIT_FAILURE, 0, _("page width too narrow"));
3034 @@ -1432,7 +1558,7 @@
3036 /* Enlarge p->start_position of first column to use the same form of
3037 padding_not_printed with all columns. */
3038 - h = h + col_sep_length;
3039 + h = h + col_sep_width;
3041 /* This loop takes care of all but the rightmost column. */
3043 @@ -1466,7 +1592,7 @@
3045 else
3047 - h = h_next + col_sep_length;
3048 + h = h_next + col_sep_width;
3049 h_next = h + chars_per_column;
3052 @@ -1756,9 +1882,9 @@
3053 align_column (COLUMN *p)
3055 padding_not_printed = p->start_position;
3056 - if (padding_not_printed - col_sep_length > 0)
3057 + if (padding_not_printed - col_sep_width > 0)
3059 - pad_across_to (padding_not_printed - col_sep_length);
3060 + pad_across_to (padding_not_printed - col_sep_width);
3061 padding_not_printed = ANYWHERE;
3064 @@ -2029,13 +2155,13 @@
3065 /* May be too generous. */
3066 buff = X2REALLOC (buff, &buff_allocated);
3068 - buff[buff_current++] = c;
3069 + buff[buff_current++] = (unsigned char) c;
3072 static void
3073 add_line_number (COLUMN *p)
3075 - int i;
3076 + int i, j;
3077 char *s;
3078 int left_cut;
3080 @@ -2058,22 +2184,24 @@
3081 /* Tabification is assumed for multiple columns, also for n-separators,
3082 but `default n-separator = TAB' hasn't been given priority over
3083 equal column_width also specified by POSIX. */
3084 - if (number_separator == '\t')
3085 + if (number_separator[0] == '\t')
3087 i = number_width - chars_per_number;
3088 while (i-- > 0)
3089 (p->char_func) (' ');
3091 else
3092 - (p->char_func) (number_separator);
3093 + for (j = 0; j < number_separator_length; j++)
3094 + (p->char_func) (number_separator[j]);
3096 else
3097 /* To comply with POSIX, we avoid any expansion of default TAB
3098 separator with a single column output. No column_width requirement
3099 has to be considered. */
3101 - (p->char_func) (number_separator);
3102 - if (number_separator == '\t')
3103 + for (j = 0; j < number_separator_length; j++)
3104 + (p->char_func) (number_separator[j]);
3105 + if (number_separator[0] == '\t')
3106 output_position = POS_AFTER_TAB (chars_per_output_tab,
3107 output_position);
3109 @@ -2234,7 +2362,7 @@
3110 while (goal - h_old > 1
3111 && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
3113 - putchar (output_tab_char);
3114 + fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout);
3115 h_old = h_new;
3117 while (++h_old <= goal)
3118 @@ -2254,6 +2382,7 @@
3120 char *s;
3121 int l = col_sep_length;
3122 + int not_space_flag;
3124 s = col_sep_string;
3126 @@ -2267,6 +2396,7 @@
3128 for (; separators_not_printed > 0; --separators_not_printed)
3130 + not_space_flag = 0;
3131 while (l-- > 0)
3133 /* 3 types of sep_strings: spaces only, spaces and chars,
3134 @@ -2280,12 +2410,15 @@
3136 else
3138 + not_space_flag = 1;
3139 if (spaces_not_printed > 0)
3140 print_white_space ();
3141 putchar (*s++);
3142 - ++output_position;
3145 + if (not_space_flag)
3146 + output_position += col_sep_width;
3148 /* sep_string ends with some spaces */
3149 if (spaces_not_printed > 0)
3150 print_white_space ();
3151 @@ -2313,7 +2446,7 @@
3152 required number of tabs and spaces. */
3154 static void
3155 -print_char (char c)
3156 +print_char_single (char c)
3158 if (tabify_output)
3160 @@ -2337,6 +2470,74 @@
3161 putchar (c);
3164 +#ifdef HAVE_MBRTOWC
3165 +static void
3166 +print_char_multi (char c)
3168 + static size_t mbc_pos = 0;
3169 + static char mbc[MB_LEN_MAX] = {'\0'};
3170 + static mbstate_t state = {'\0'};
3171 + mbstate_t state_bak;
3172 + wchar_t wc;
3173 + size_t mblength;
3174 + int width;
3176 + if (tabify_output)
3178 + state_bak = state;
3179 + mbc[mbc_pos++] = c;
3180 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
3182 + while (mbc_pos > 0)
3184 + switch (mblength)
3186 + case (size_t)-2:
3187 + state = state_bak;
3188 + return;
3190 + case (size_t)-1:
3191 + state = state_bak;
3192 + ++output_position;
3193 + putchar (mbc[0]);
3194 + memmove (mbc, mbc + 1, MB_CUR_MAX - 1);
3195 + --mbc_pos;
3196 + break;
3198 + case 0:
3199 + mblength = 1;
3201 + default:
3202 + if (wc == L' ')
3204 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
3205 + --mbc_pos;
3206 + ++spaces_not_printed;
3207 + return;
3209 + else if (spaces_not_printed > 0)
3210 + print_white_space ();
3212 + /* Nonprintables are assumed to have width 0, except L'\b'. */
3213 + if ((width = wcwidth (wc)) < 1)
3215 + if (wc == L'\b')
3216 + --output_position;
3218 + else
3219 + output_position += width;
3221 + fwrite (mbc, sizeof(char), mblength, stdout);
3222 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
3223 + mbc_pos -= mblength;
3226 + return;
3228 + putchar (c);
3230 +#endif
3232 /* Skip to page PAGE before printing.
3233 PAGE may be larger than total number of pages. */
3235 @@ -2517,9 +2718,9 @@
3236 align_empty_cols = false;
3239 - if (padding_not_printed - col_sep_length > 0)
3240 + if (padding_not_printed - col_sep_width > 0)
3242 - pad_across_to (padding_not_printed - col_sep_length);
3243 + pad_across_to (padding_not_printed - col_sep_width);
3244 padding_not_printed = ANYWHERE;
3247 @@ -2620,9 +2821,9 @@
3251 - if (padding_not_printed - col_sep_length > 0)
3252 + if (padding_not_printed - col_sep_width > 0)
3254 - pad_across_to (padding_not_printed - col_sep_length);
3255 + pad_across_to (padding_not_printed - col_sep_width);
3256 padding_not_printed = ANYWHERE;
3259 @@ -2635,8 +2836,8 @@
3260 if (spaces_not_printed == 0)
3262 output_position = p->start_position + end_vector[line];
3263 - if (p->start_position - col_sep_length == chars_per_margin)
3264 - output_position -= col_sep_length;
3265 + if (p->start_position - col_sep_width == chars_per_margin)
3266 + output_position -= col_sep_width;
3269 return true;
3270 @@ -2655,7 +2856,7 @@
3271 number of characters is 1.) */
3273 static int
3274 -char_to_clump (char c)
3275 +char_to_clump_single (char c)
3277 unsigned char uc = c;
3278 char *s = clump_buff;
3279 @@ -2665,10 +2866,10 @@
3280 int chars;
3281 int chars_per_c = 8;
3283 - if (c == input_tab_char)
3284 + if (c == input_tab_char[0])
3285 chars_per_c = chars_per_input_tab;
3287 - if (c == input_tab_char || c == '\t')
3288 + if (c == input_tab_char[0] || c == '\t')
3290 width = TAB_WIDTH (chars_per_c, input_position);
3292 @@ -2739,6 +2940,154 @@
3293 return chars;
3296 +#ifdef HAVE_MBRTOWC
3297 +static int
3298 +char_to_clump_multi (char c)
3300 + static size_t mbc_pos = 0;
3301 + static char mbc[MB_LEN_MAX] = {'\0'};
3302 + static mbstate_t state = {'\0'};
3303 + mbstate_t state_bak;
3304 + wchar_t wc;
3305 + size_t mblength;
3306 + int wc_width;
3307 + register char *s = clump_buff;
3308 + register int i, j;
3309 + char esc_buff[4];
3310 + int width;
3311 + int chars;
3312 + int chars_per_c = 8;
3314 + state_bak = state;
3315 + mbc[mbc_pos++] = c;
3316 + mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
3318 + width = 0;
3319 + chars = 0;
3320 + while (mbc_pos > 0)
3322 + switch (mblength)
3324 + case (size_t)-2:
3325 + state = state_bak;
3326 + return 0;
3328 + case (size_t)-1:
3329 + state = state_bak;
3330 + mblength = 1;
3332 + if (use_esc_sequence || use_cntrl_prefix)
3334 + width = +4;
3335 + chars = +4;
3336 + *s++ = '\\';
3337 + sprintf (esc_buff, "%03o", mbc[0]);
3338 + for (i = 0; i <= 2; ++i)
3339 + *s++ = (int) esc_buff[i];
3341 + else
3343 + width += 1;
3344 + chars += 1;
3345 + *s++ = mbc[0];
3347 + break;
3349 + case 0:
3350 + mblength = 1;
3351 + /* Fall through */
3353 + default:
3354 + if (memcmp (mbc, input_tab_char, mblength) == 0)
3355 + chars_per_c = chars_per_input_tab;
3357 + if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t')
3359 + int width_inc;
3361 + width_inc = TAB_WIDTH (chars_per_c, input_position);
3362 + width += width_inc;
3364 + if (untabify_input)
3366 + for (i = width_inc; i; --i)
3367 + *s++ = ' ';
3368 + chars += width_inc;
3370 + else
3372 + for (i = 0; i < mblength; i++)
3373 + *s++ = mbc[i];
3374 + chars += mblength;
3377 + else if ((wc_width = wcwidth (wc)) < 1)
3379 + if (use_esc_sequence)
3381 + for (i = 0; i < mblength; i++)
3383 + width += 4;
3384 + chars += 4;
3385 + *s++ = '\\';
3386 + sprintf (esc_buff, "%03o", c);
3387 + for (j = 0; j <= 2; ++j)
3388 + *s++ = (int) esc_buff[j];
3391 + else if (use_cntrl_prefix)
3393 + if (wc < 0200)
3395 + width += 2;
3396 + chars += 2;
3397 + *s++ = '^';
3398 + *s++ = wc ^ 0100;
3400 + else
3402 + for (i = 0; i < mblength; i++)
3404 + width += 4;
3405 + chars += 4;
3406 + *s++ = '\\';
3407 + sprintf (esc_buff, "%03o", c);
3408 + for (j = 0; j <= 2; ++j)
3409 + *s++ = (int) esc_buff[j];
3413 + else if (wc == L'\b')
3415 + width += -1;
3416 + chars += 1;
3417 + *s++ = c;
3419 + else
3421 + width += 0;
3422 + chars += mblength;
3423 + for (i = 0; i < mblength; i++)
3424 + *s++ = mbc[i];
3427 + else
3429 + width += wc_width;
3430 + chars += mblength;
3431 + for (i = 0; i < mblength; i++)
3432 + *s++ = mbc[i];
3435 + memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
3436 + mbc_pos -= mblength;
3439 + input_position += width;
3440 + return chars;
3442 +#endif
3444 /* We've just printed some files and need to clean up things before
3445 looking for more options and printing the next batch of files.
3447 --- coreutils-6.8+/src/cut.c.i18n 2007-01-14 15:41:28.000000000 +0000
3448 +++ coreutils-6.8+/src/cut.c 2007-03-01 15:08:24.000000000 +0000
3449 @@ -29,6 +29,11 @@
3450 #include <assert.h>
3451 #include <getopt.h>
3452 #include <sys/types.h>
3454 +/* Get mbstate_t, mbrtowc(). */
3455 +#if HAVE_WCHAR_H
3456 +# include <wchar.h>
3457 +#endif
3458 #include "system.h"
3460 #include "error.h"
3461 @@ -37,6 +42,18 @@
3462 #include "quote.h"
3463 #include "xstrndup.h"
3465 +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
3466 + installation; work around this configuration error. */
3467 +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
3468 +# undef MB_LEN_MAX
3469 +# define MB_LEN_MAX 16
3470 +#endif
3472 +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
3473 +#if HAVE_MBRTOWC && defined mbstate_t
3474 +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
3475 +#endif
3477 /* The official name of this program (e.g., no `g' prefix). */
3478 #define PROGRAM_NAME "cut"
3480 @@ -67,6 +84,52 @@
3482 while (0)
3484 +/* Refill the buffer BUF to get a multibyte character. */
3485 +#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM) \
3486 + do \
3487 + { \
3488 + if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM)) \
3489 + { \
3490 + memmove (BUF, BUFPOS, BUFLEN); \
3491 + BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
3492 + BUFPOS = BUF; \
3493 + } \
3494 + } \
3495 + while (0)
3497 +/* Get wide character on BUFPOS. BUFPOS is not included after that.
3498 + If byte sequence is not valid as a character, CONVFAIL is 1. Otherwise 0. */
3499 +#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
3500 + do \
3501 + { \
3502 + mbstate_t state_bak; \
3504 + if (BUFLEN < 1) \
3505 + { \
3506 + WC = WEOF; \
3507 + break; \
3508 + } \
3510 + /* Get a wide character. */ \
3511 + CONVFAIL = 0; \
3512 + state_bak = STATE; \
3513 + MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE); \
3515 + switch (MBLENGTH) \
3516 + { \
3517 + case (size_t)-1: \
3518 + case (size_t)-2: \
3519 + CONVFAIL++; \
3520 + STATE = state_bak; \
3521 + /* Fall througn. */ \
3523 + case 0: \
3524 + MBLENGTH = 1; \
3525 + break; \
3526 + } \
3527 + } \
3528 + while (0)
3530 struct range_pair
3532 size_t lo;
3533 @@ -85,7 +148,7 @@
3534 /* The number of bytes allocated for FIELD_1_BUFFER. */
3535 static size_t field_1_bufsize;
3537 -/* The largest field or byte index used as an endpoint of a closed
3538 +/* The largest byte, character or field index used as an endpoint of a closed
3539 or degenerate range specification; this doesn't include the starting
3540 index of right-open-ended ranges. For example, with either range spec
3541 `2-5,9-', `2-3,5,9-' this variable would be set to 5. */
3542 @@ -97,10 +160,11 @@
3544 /* This is a bit vector.
3545 In byte mode, which bytes to output.
3546 + In character mode, which characters to output.
3547 In field mode, which DELIM-separated fields to output.
3548 - Both bytes and fields are numbered starting with 1,
3549 + Bytes, characters and fields are numbered starting with 1,
3550 so the zeroth bit of this array is unused.
3551 - A field or byte K has been selected if
3552 + A byte, character or field K has been selected if
3553 (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
3554 || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
3555 static unsigned char *printable_field;
3556 @@ -109,9 +173,12 @@
3558 undefined_mode,
3560 - /* Output characters that are in the given bytes. */
3561 + /* Output bytes that are at the given positions. */
3562 byte_mode,
3564 + /* Output characters that are at the given positions. */
3565 + character_mode,
3567 /* Output the given delimeter-separated fields. */
3568 field_mode
3570 @@ -121,6 +188,13 @@
3572 static enum operating_mode operating_mode;
3574 +/* If nonzero, when in byte mode, don't split multibyte characters. */
3575 +static int byte_mode_character_aware;
3577 +/* If nonzero, the function for single byte locale is work
3578 + if this program runs on multibyte locale. */
3579 +static int force_singlebyte_mode;
3581 /* If true do not output lines containing no delimeter characters.
3582 Otherwise, all such lines are printed. This option is valid only
3583 with field mode. */
3584 @@ -132,6 +206,9 @@
3586 /* The delimeter character for field mode. */
3587 static unsigned char delim;
3588 +#if HAVE_WCHAR_H
3589 +static wchar_t wcdelim;
3590 +#endif
3592 /* True if the --output-delimiter=STRING option was specified. */
3593 static bool output_delimiter_specified;
3594 @@ -205,7 +282,7 @@
3595 -f, --fields=LIST select only these fields; also print any line\n\
3596 that contains no delimiter character, unless\n\
3597 the -s option is specified\n\
3598 - -n (ignored)\n\
3599 + -n with -b: don't split multibyte characters\n\
3600 "), stdout);
3601 fputs (_("\
3602 --complement complement the set of selected bytes, characters\n\
3603 @@ -362,7 +439,7 @@
3604 in_digits = false;
3605 /* Starting a range. */
3606 if (dash_found)
3607 - FATAL_ERROR (_("invalid byte or field list"));
3608 + FATAL_ERROR (_("invalid byte, character or field list"));
3609 dash_found = true;
3610 fieldstr++;
3612 @@ -387,14 +464,16 @@
3613 if (value == 0)
3615 /* `n-'. From `initial' to end of line. */
3616 - eol_range_start = initial;
3617 + if (eol_range_start == 0 ||
3618 + (eol_range_start != 0 && eol_range_start > initial))
3619 + eol_range_start = initial;
3620 field_found = true;
3622 else
3624 /* `m-n' or `-n' (1-n). */
3625 if (value < initial)
3626 - FATAL_ERROR (_("invalid byte or field list"));
3627 + FATAL_ERROR (_("invalid byte, character or field list"));
3629 /* Is there already a range going to end of line? */
3630 if (eol_range_start != 0)
3631 @@ -467,6 +546,9 @@
3632 if (operating_mode == byte_mode)
3633 error (0, 0,
3634 _("byte offset %s is too large"), quote (bad_num));
3635 + else if (operating_mode == character_mode)
3636 + error (0, 0,
3637 + _("character offset %s is too large"), quote (bad_num));
3638 else
3639 error (0, 0,
3640 _("field number %s is too large"), quote (bad_num));
3641 @@ -477,7 +559,7 @@
3642 fieldstr++;
3644 else
3645 - FATAL_ERROR (_("invalid byte or field list"));
3646 + FATAL_ERROR (_("invalid byte, character or field list"));
3649 max_range_endpoint = 0;
3650 @@ -570,6 +652,63 @@
3654 +#if HAVE_MBRTOWC
3655 +/* This function is in use for the following case.
3657 + 1. Read from the stream STREAM, printing to standard output any selected
3658 + characters.
3660 + 2. Read from stream STREAM, printing to standard output any selected bytes,
3661 + without splitting multibyte characters. */
3663 +static void
3664 +cut_characters_or_cut_bytes_no_split (FILE *stream)
3666 + int idx; /* number of bytes or characters in the line so far. */
3667 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
3668 + char *bufpos; /* Next read position of BUF. */
3669 + size_t buflen; /* The length of the byte sequence in buf. */
3670 + wint_t wc; /* A gotten wide character. */
3671 + size_t mblength; /* The byte size of a multibyte character which shows
3672 + as same character as WC. */
3673 + mbstate_t state; /* State of the stream. */
3674 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
3676 + idx = 0;
3677 + buflen = 0;
3678 + bufpos = buf;
3679 + memset (&state, '\0', sizeof(mbstate_t));
3681 + while (1)
3683 + REFILL_BUFFER (buf, bufpos, buflen, stream);
3685 + GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
3687 + if (wc == WEOF)
3689 + if (idx > 0)
3690 + putchar ('\n');
3691 + break;
3693 + else if (wc == L'\n')
3695 + putchar ('\n');
3696 + idx = 0;
3698 + else
3700 + idx += (operating_mode == byte_mode) ? mblength : 1;
3701 + if (print_kth (idx, NULL))
3702 + fwrite (bufpos, mblength, sizeof(char), stdout);
3705 + buflen -= mblength;
3706 + bufpos += mblength;
3709 +#endif
3711 /* Read from stream STREAM, printing to standard output any selected fields. */
3713 static void
3714 @@ -692,13 +831,192 @@
3718 +#if HAVE_MBRTOWC
3719 +static void
3720 +cut_fields_mb (FILE *stream)
3722 + int c;
3723 + unsigned int field_idx;
3724 + int found_any_selected_field;
3725 + int buffer_first_field;
3726 + int empty_input;
3727 + char buf[MB_LEN_MAX + BUFSIZ]; /* For spooling a read byte sequence. */
3728 + char *bufpos; /* Next read position of BUF. */
3729 + size_t buflen; /* The length of the byte sequence in buf. */
3730 + wint_t wc = 0; /* A gotten wide character. */
3731 + size_t mblength; /* The byte size of a multibyte character which shows
3732 + as same character as WC. */
3733 + mbstate_t state; /* State of the stream. */
3734 + int convfail; /* 1, when conversion is failed. Otherwise 0. */
3736 + found_any_selected_field = 0;
3737 + field_idx = 1;
3738 + bufpos = buf;
3739 + buflen = 0;
3740 + memset (&state, '\0', sizeof(mbstate_t));
3742 + c = getc (stream);
3743 + empty_input = (c == EOF);
3744 + if (c != EOF)
3745 + ungetc (c, stream);
3746 + else
3747 + wc = WEOF;
3749 + /* To support the semantics of the -s flag, we may have to buffer
3750 + all of the first field to determine whether it is `delimited.'
3751 + But that is unnecessary if all non-delimited lines must be printed
3752 + and the first field has been selected, or if non-delimited lines
3753 + must be suppressed and the first field has *not* been selected.
3754 + That is because a non-delimited line has exactly one field. */
3755 + buffer_first_field = (suppress_non_delimited ^ !print_kth (1, NULL));
3757 + while (1)
3759 + if (field_idx == 1 && buffer_first_field)
3761 + int len = 0;
3763 + while (1)
3765 + REFILL_BUFFER (buf, bufpos, buflen, stream);
3767 + GET_NEXT_WC_FROM_BUFFER
3768 + (wc, bufpos, buflen, mblength, state, convfail);
3770 + if (wc == WEOF)
3771 + break;
3773 + field_1_buffer = xrealloc (field_1_buffer, len + mblength);
3774 + memcpy (field_1_buffer + len, bufpos, mblength);
3775 + len += mblength;
3776 + buflen -= mblength;
3777 + bufpos += mblength;
3779 + if (!convfail && (wc == L'\n' || wc == wcdelim))
3780 + break;
3783 + if (wc == WEOF)
3784 + break;
3786 + /* If the first field extends to the end of line (it is not
3787 + delimited) and we are printing all non-delimited lines,
3788 + print this one. */
3789 + if (convfail || (!convfail && wc != wcdelim))
3791 + if (suppress_non_delimited)
3793 + /* Empty. */
3795 + else
3797 + fwrite (field_1_buffer, sizeof (char), len, stdout);
3798 + /* Make sure the output line is newline terminated. */
3799 + if (convfail || (!convfail && wc != L'\n'))
3800 + putchar ('\n');
3802 + continue;
3805 + if (print_kth (1, NULL))
3807 + /* Print the field, but not the trailing delimiter. */
3808 + fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
3809 + found_any_selected_field = 1;
3811 + ++field_idx;
3814 + if (wc != WEOF)
3816 + if (print_kth (field_idx, NULL))
3818 + if (found_any_selected_field)
3820 + fwrite (output_delimiter_string, sizeof (char),
3821 + output_delimiter_length, stdout);
3823 + found_any_selected_field = 1;
3826 + while (1)
3828 + REFILL_BUFFER (buf, bufpos, buflen, stream);
3830 + GET_NEXT_WC_FROM_BUFFER
3831 + (wc, bufpos, buflen, mblength, state, convfail);
3833 + if (wc == WEOF)
3834 + break;
3835 + else if (!convfail && (wc == wcdelim || wc == L'\n'))
3837 + buflen -= mblength;
3838 + bufpos += mblength;
3839 + break;
3842 + if (print_kth (field_idx, NULL))
3843 + fwrite (bufpos, mblength, sizeof(char), stdout);
3845 + buflen -= mblength;
3846 + bufpos += mblength;
3850 + if ((!convfail || wc == L'\n') && buflen < 1)
3851 + wc = WEOF;
3853 + if (!convfail && wc == wcdelim)
3854 + ++field_idx;
3855 + else if (wc == WEOF || (!convfail && wc == L'\n'))
3857 + if (found_any_selected_field
3858 + || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
3859 + putchar ('\n');
3860 + if (wc == WEOF)
3861 + break;
3862 + field_idx = 1;
3863 + found_any_selected_field = 0;
3867 +#endif
3869 static void
3870 cut_stream (FILE *stream)
3872 - if (operating_mode == byte_mode)
3873 - cut_bytes (stream);
3874 +#if HAVE_MBRTOWC
3875 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
3877 + switch (operating_mode)
3879 + case byte_mode:
3880 + if (byte_mode_character_aware)
3881 + cut_characters_or_cut_bytes_no_split (stream);
3882 + else
3883 + cut_bytes (stream);
3884 + break;
3886 + case character_mode:
3887 + cut_characters_or_cut_bytes_no_split (stream);
3888 + break;
3890 + case field_mode:
3891 + cut_fields_mb (stream);
3892 + break;
3894 + default:
3895 + abort ();
3898 else
3899 - cut_fields (stream);
3900 +#endif
3902 + if (operating_mode == field_mode)
3903 + cut_fields (stream);
3904 + else
3905 + cut_bytes (stream);
3909 /* Process file FILE to standard output.
3910 @@ -748,6 +1066,8 @@
3911 bool ok;
3912 bool delim_specified = false;
3913 char *spec_list_string IF_LINT(= NULL);
3914 + char mbdelim[MB_LEN_MAX + 1];
3915 + size_t delimlen = 0;
3917 initialize_main (&argc, &argv);
3918 program_name = argv[0];
3919 @@ -770,7 +1090,6 @@
3920 switch (optc)
3922 case 'b':
3923 - case 'c':
3924 /* Build the byte list. */
3925 if (operating_mode != undefined_mode)
3926 FATAL_ERROR (_("only one type of list may be specified"));
3927 @@ -778,6 +1097,14 @@
3928 spec_list_string = optarg;
3929 break;
3931 + case 'c':
3932 + /* Build the character list. */
3933 + if (operating_mode != undefined_mode)
3934 + FATAL_ERROR (_("only one type of list may be specified"));
3935 + operating_mode = character_mode;
3936 + spec_list_string = optarg;
3937 + break;
3939 case 'f':
3940 /* Build the field list. */
3941 if (operating_mode != undefined_mode)
3942 @@ -789,10 +1116,35 @@
3943 case 'd':
3944 /* New delimiter. */
3945 /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
3946 - if (optarg[0] != '\0' && optarg[1] != '\0')
3947 - FATAL_ERROR (_("the delimiter must be a single character"));
3948 - delim = optarg[0];
3949 - delim_specified = true;
3950 +#if HAVE_MBRTOWC
3952 + if(MB_CUR_MAX > 1)
3954 + mbstate_t state;
3956 + memset (&state, '\0', sizeof(mbstate_t));
3957 + delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
3959 + if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
3960 + ++force_singlebyte_mode;
3961 + else
3963 + delimlen = (delimlen < 1) ? 1 : delimlen;
3964 + if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
3965 + FATAL_ERROR (_("the delimiter must be a single character"));
3966 + memcpy (mbdelim, optarg, delimlen);
3970 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
3971 +#endif
3973 + if (optarg[0] != '\0' && optarg[1] != '\0')
3974 + FATAL_ERROR (_("the delimiter must be a single character"));
3975 + delim = (unsigned char) optarg[0];
3977 + delim_specified = true;
3979 break;
3981 case OUTPUT_DELIMITER_OPTION:
3982 @@ -805,6 +1157,7 @@
3983 break;
3985 case 'n':
3986 + byte_mode_character_aware = 1;
3987 break;
3989 case 's':
3990 @@ -827,7 +1180,7 @@
3991 if (operating_mode == undefined_mode)
3992 FATAL_ERROR (_("you must specify a list of bytes, characters, or fields"));
3994 - if (delim != '\0' && operating_mode != field_mode)
3995 + if (delim_specified && operating_mode != field_mode)
3996 FATAL_ERROR (_("an input delimiter may be specified only\
3997 when operating on fields"));
3999 @@ -854,15 +1207,34 @@
4002 if (!delim_specified)
4003 - delim = '\t';
4005 + delim = '\t';
4006 +#ifdef HAVE_MBRTOWC
4007 + wcdelim = L'\t';
4008 + mbdelim[0] = '\t';
4009 + mbdelim[1] = '\0';
4010 + delimlen = 1;
4011 +#endif
4014 if (output_delimiter_string == NULL)
4016 - static char dummy[2];
4017 - dummy[0] = delim;
4018 - dummy[1] = '\0';
4019 - output_delimiter_string = dummy;
4020 - output_delimiter_length = 1;
4021 +#ifdef HAVE_MBRTOWC
4022 + if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
4024 + output_delimiter_string = xstrdup(mbdelim);
4025 + output_delimiter_length = delimlen;
4028 + if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
4029 +#endif
4031 + static char dummy[2];
4032 + dummy[0] = delim;
4033 + dummy[1] = '\0';
4034 + output_delimiter_string = dummy;
4035 + output_delimiter_length = 1;
4039 if (optind == argc)