1 /* Word breaks in UTF-8 strings.
2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2009.
5 This program is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Lesser General Public License as published
7 by the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
26 #include "uniwbrk/wbrktable.h"
28 #define FUNC u8_wordbreaks
30 #define U_MBTOUC_UNSAFE u8_mbtouc_unsafe
31 #include "u-wordbreaks.h"
39 /* Read the contents of an input stream, and return it, terminated with a NUL
42 read_file (FILE *stream
)
50 while (! feof (stream
))
52 if (size
+ BUFSIZE
> alloc
)
54 alloc
= alloc
+ alloc
/ 2;
55 if (alloc
< size
+ BUFSIZE
)
56 alloc
= size
+ BUFSIZE
;
57 buf
= realloc (buf
, alloc
);
60 fprintf (stderr
, "out of memory\n");
64 count
= fread (buf
+ size
, 1, BUFSIZE
, stream
);
76 buf
= realloc (buf
, size
+ 1);
79 fprintf (stderr
, "out of memory\n");
88 main (int argc
, char * argv
[])
92 /* Display all the word breaks in the input string. */
93 char *input
= read_file (stdin
);
94 int length
= strlen (input
);
95 char *breaks
= malloc (length
);
98 u8_wordbreaks ((uint8_t *) input
, length
, breaks
);
100 for (i
= 0; i
< length
; i
++)
105 /* U+2027 in UTF-8 encoding */
106 putc (0xe2, stdout
); putc (0x80, stdout
); putc (0xa7, stdout
);
113 putc (input
[i
], stdout
);