usleep tests: Avoid failure due to known Cygwin 3.5.3 bug.
[gnulib.git] / lib / unilbrk / u8-width-linebreaks.c
blobf86ce4a941515af2194eba64908f9005fdfbbb07
1 /* Line breaking of UTF-8 strings.
2 Copyright (C) 2001-2003, 2006-2024 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2001.
5 This file is free software.
6 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7 You can redistribute it and/or modify it under either
8 - the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation, either version 3, or (at your
10 option) any later version, or
11 - the terms of the GNU General Public License as published by the
12 Free Software Foundation; either version 2, or (at your option)
13 any later version, or
14 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
16 This file is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License and the GNU General Public License
20 for more details.
22 You should have received a copy of the GNU Lesser General Public
23 License and of the GNU General Public License along with this
24 program. If not, see <https://www.gnu.org/licenses/>. */
26 #include <config.h>
28 /* Specification. */
29 #include "unilbrk.h"
30 #include "unilbrk/internal.h"
32 #include "unilbrk/lbrktables.h"
33 #include "unistr.h"
34 #include "uniwidth.h"
36 int
37 u8_width_linebreaks_internal (const uint8_t *s, size_t n,
38 int width, int start_column, int at_end_columns,
39 const char *o, const char *encoding, int cr,
40 char *p)
42 const uint8_t *s_end;
43 char *last_p;
44 int last_column;
45 int piece_width;
47 u8_possible_linebreaks_loop (s, n, encoding, cr, p);
49 s_end = s + n;
50 last_p = NULL;
51 last_column = start_column;
52 piece_width = 0;
53 while (s < s_end)
55 ucs4_t uc;
56 int count = u8_mbtouc_unsafe (&uc, s, s_end - s);
58 /* Respect the override. */
59 if (o != NULL && *o != UC_BREAK_UNDEFINED)
60 *p = *o;
62 if (*p == UC_BREAK_POSSIBLE
63 || *p == UC_BREAK_MANDATORY || *p == UC_BREAK_CR_BEFORE_LF)
65 /* An atomic piece of text ends here. */
66 if (last_p != NULL && last_column + piece_width > width)
68 /* Insert a line break. */
69 *last_p = UC_BREAK_POSSIBLE;
70 last_column = 0;
74 if (*p == UC_BREAK_MANDATORY || *p == UC_BREAK_CR_BEFORE_LF)
76 /* uc is a line break character. */
77 /* Start a new piece at column 0. */
78 last_p = NULL;
79 last_column = 0;
80 piece_width = 0;
82 else
84 /* uc is not a line break character. */
85 int w;
87 if (*p == UC_BREAK_POSSIBLE)
89 /* Start a new piece. */
90 last_p = p;
91 last_column += piece_width;
92 piece_width = 0;
93 /* No line break for the moment, may be turned into
94 UC_BREAK_POSSIBLE later, via last_p. */
97 *p = UC_BREAK_PROHIBITED;
99 w = uc_width (uc, encoding);
100 if (w >= 0) /* ignore control characters in the string */
101 piece_width += w;
104 s += count;
105 p += count;
106 if (o != NULL)
107 o += count;
110 /* The last atomic piece of text ends here. */
111 if (last_p != NULL && last_column + piece_width + at_end_columns > width)
113 /* Insert a line break. */
114 *last_p = UC_BREAK_POSSIBLE;
115 last_column = 0;
118 return last_column + piece_width;
121 #if defined IN_LIBUNISTRING
122 /* For backward compatibility with older versions of libunistring. */
124 # undef u8_width_linebreaks
127 u8_width_linebreaks (const uint8_t *s, size_t n,
128 int width, int start_column, int at_end_columns,
129 const char *o, const char *encoding,
130 char *p)
132 return u8_width_linebreaks_internal (s, n,
133 width, start_column, at_end_columns,
134 o, encoding, -1, p);
137 #endif
140 u8_width_linebreaks_v2 (const uint8_t *s, size_t n,
141 int width, int start_column, int at_end_columns,
142 const char *o, const char *encoding,
143 char *p)
145 return u8_width_linebreaks_internal (s, n,
146 width, start_column, at_end_columns,
147 o, encoding, LBP_CR, p);
151 #ifdef TEST
153 #include <stdio.h>
154 #include <stdlib.h>
155 #include <string.h>
157 /* Read the contents of an input stream, and return it, terminated with a NUL
158 byte. */
159 char *
160 read_file (FILE *stream)
162 #define BUFSIZE 4096
163 char *buf = NULL;
164 int alloc = 0;
165 int size = 0;
166 int count;
168 while (! feof (stream))
170 if (size + BUFSIZE > alloc)
172 alloc = alloc + alloc / 2;
173 if (alloc < size + BUFSIZE)
174 alloc = size + BUFSIZE;
175 buf = realloc (buf, alloc);
176 if (buf == NULL)
178 fprintf (stderr, "out of memory\n");
179 exit (1);
182 count = fread (buf + size, 1, BUFSIZE, stream);
183 if (count == 0)
185 if (ferror (stream))
187 perror ("fread");
188 exit (1);
191 else
192 size += count;
194 buf = realloc (buf, size + 1);
195 if (buf == NULL)
197 fprintf (stderr, "out of memory\n");
198 exit (1);
200 buf[size] = '\0';
201 return buf;
202 #undef BUFSIZE
206 main (int argc, char * argv[])
208 if (argc == 2)
210 /* Insert line breaks for a given width. */
211 int width = atoi (argv[1]);
212 char *input = read_file (stdin);
213 int length = strlen (input);
214 char *breaks = malloc (length);
215 int i;
217 u8_width_linebreaks_v2 ((uint8_t *) input, length, width, 0, 0, NULL, "UTF-8", breaks);
219 for (i = 0; i < length; i++)
221 switch (breaks[i])
223 case UC_BREAK_POSSIBLE:
224 putc ('\n', stdout);
225 break;
226 case UC_BREAK_MANDATORY:
227 break;
228 case UC_BREAK_CR_BEFORE_LF:
229 break;
230 case UC_BREAK_PROHIBITED:
231 break;
232 default:
233 abort ();
235 putc (input[i], stdout);
238 free (breaks);
240 return 0;
242 else
243 return 1;
246 #endif /* TEST */