timespec_get: New module.
[gnulib.git] / lib / uniwbrk / u8-wordbreaks.c
blobd30d2ea28e7e03d56dc0eacc09635ec811448af9
1 /* Word breaks in UTF-8 strings.
2 Copyright (C) 2009-2021 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2009.
5 This program is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Lesser General Public License as published
7 by the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 #include <config.h>
20 /* Specification. */
21 #include "uniwbrk.h"
23 #include <string.h>
25 #include "unistr.h"
26 #include "uniwbrk/wbrktable.h"
28 #define FUNC u8_wordbreaks
29 #define UNIT uint8_t
30 #define U_MBTOUC_UNSAFE u8_mbtouc_unsafe
31 #include "u-wordbreaks.h"
34 #ifdef TEST
36 #include <stdio.h>
37 #include <stdlib.h>
39 /* Read the contents of an input stream, and return it, terminated with a NUL
40 byte. */
41 char *
42 read_file (FILE *stream)
44 #define BUFSIZE 4096
45 char *buf = NULL;
46 int alloc = 0;
47 int size = 0;
48 int count;
50 while (! feof (stream))
52 if (size + BUFSIZE > alloc)
54 alloc = alloc + alloc / 2;
55 if (alloc < size + BUFSIZE)
56 alloc = size + BUFSIZE;
57 buf = realloc (buf, alloc);
58 if (buf == NULL)
60 fprintf (stderr, "out of memory\n");
61 exit (1);
64 count = fread (buf + size, 1, BUFSIZE, stream);
65 if (count == 0)
67 if (ferror (stream))
69 perror ("fread");
70 exit (1);
73 else
74 size += count;
76 buf = realloc (buf, size + 1);
77 if (buf == NULL)
79 fprintf (stderr, "out of memory\n");
80 exit (1);
82 buf[size] = '\0';
83 return buf;
84 #undef BUFSIZE
87 int
88 main (int argc, char * argv[])
90 if (argc == 1)
92 /* Display all the word breaks in the input string. */
93 char *input = read_file (stdin);
94 int length = strlen (input);
95 char *breaks = malloc (length);
96 int i;
98 u8_wordbreaks ((uint8_t *) input, length, breaks);
100 for (i = 0; i < length; i++)
102 switch (breaks[i])
104 case 1:
105 /* U+2027 in UTF-8 encoding */
106 putc (0xe2, stdout); putc (0x80, stdout); putc (0xa7, stdout);
107 break;
108 case 0:
109 break;
110 default:
111 abort ();
113 putc (input[i], stdout);
116 free (breaks);
118 return 0;
120 else
121 return 1;
124 #endif /* TEST */