1 /* Line breaking of UTF-8 strings.
2 Copyright (C) 2001-2003, 2006-2024 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2001.
5 This file is free software.
6 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7 You can redistribute it and/or modify it under either
8 - the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation, either version 3, or (at your
10 option) any later version, or
11 - the terms of the GNU General Public License as published by the
12 Free Software Foundation; either version 2, or (at your option)
14 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
16 This file is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License and the GNU General Public License
22 You should have received a copy of the GNU Lesser General Public
23 License and of the GNU General Public License along with this
24 program. If not, see <https://www.gnu.org/licenses/>. */
30 #include "unilbrk/internal.h"
32 #include "unilbrk/lbrktables.h"
37 u8_width_linebreaks_internal (const uint8_t *s
, size_t n
,
38 int width
, int start_column
, int at_end_columns
,
39 const char *o
, const char *encoding
, int cr
,
47 u8_possible_linebreaks_loop (s
, n
, encoding
, cr
, p
);
51 last_column
= start_column
;
56 int count
= u8_mbtouc_unsafe (&uc
, s
, s_end
- s
);
58 /* Respect the override. */
59 if (o
!= NULL
&& *o
!= UC_BREAK_UNDEFINED
)
62 if (*p
== UC_BREAK_POSSIBLE
63 || *p
== UC_BREAK_MANDATORY
|| *p
== UC_BREAK_CR_BEFORE_LF
)
65 /* An atomic piece of text ends here. */
66 if (last_p
!= NULL
&& last_column
+ piece_width
> width
)
68 /* Insert a line break. */
69 *last_p
= UC_BREAK_POSSIBLE
;
74 if (*p
== UC_BREAK_MANDATORY
|| *p
== UC_BREAK_CR_BEFORE_LF
)
76 /* uc is a line break character. */
77 /* Start a new piece at column 0. */
84 /* uc is not a line break character. */
87 if (*p
== UC_BREAK_POSSIBLE
)
89 /* Start a new piece. */
91 last_column
+= piece_width
;
93 /* No line break for the moment, may be turned into
94 UC_BREAK_POSSIBLE later, via last_p. */
97 *p
= UC_BREAK_PROHIBITED
;
99 w
= uc_width (uc
, encoding
);
100 if (w
>= 0) /* ignore control characters in the string */
110 /* The last atomic piece of text ends here. */
111 if (last_p
!= NULL
&& last_column
+ piece_width
+ at_end_columns
> width
)
113 /* Insert a line break. */
114 *last_p
= UC_BREAK_POSSIBLE
;
118 return last_column
+ piece_width
;
121 #if defined IN_LIBUNISTRING
122 /* For backward compatibility with older versions of libunistring. */
124 # undef u8_width_linebreaks
127 u8_width_linebreaks (const uint8_t *s
, size_t n
,
128 int width
, int start_column
, int at_end_columns
,
129 const char *o
, const char *encoding
,
132 return u8_width_linebreaks_internal (s
, n
,
133 width
, start_column
, at_end_columns
,
140 u8_width_linebreaks_v2 (const uint8_t *s
, size_t n
,
141 int width
, int start_column
, int at_end_columns
,
142 const char *o
, const char *encoding
,
145 return u8_width_linebreaks_internal (s
, n
,
146 width
, start_column
, at_end_columns
,
147 o
, encoding
, LBP_CR
, p
);
157 /* Read the contents of an input stream, and return it, terminated with a NUL
160 read_file (FILE *stream
)
168 while (! feof (stream
))
170 if (size
+ BUFSIZE
> alloc
)
172 alloc
= alloc
+ alloc
/ 2;
173 if (alloc
< size
+ BUFSIZE
)
174 alloc
= size
+ BUFSIZE
;
175 buf
= realloc (buf
, alloc
);
178 fprintf (stderr
, "out of memory\n");
182 count
= fread (buf
+ size
, 1, BUFSIZE
, stream
);
194 buf
= realloc (buf
, size
+ 1);
197 fprintf (stderr
, "out of memory\n");
206 main (int argc
, char * argv
[])
210 /* Insert line breaks for a given width. */
211 int width
= atoi (argv
[1]);
212 char *input
= read_file (stdin
);
213 int length
= strlen (input
);
214 char *breaks
= malloc (length
);
217 u8_width_linebreaks_v2 ((uint8_t *) input
, length
, width
, 0, 0, NULL
, "UTF-8", breaks
);
219 for (i
= 0; i
< length
; i
++)
223 case UC_BREAK_POSSIBLE
:
226 case UC_BREAK_MANDATORY
:
228 case UC_BREAK_CR_BEFORE_LF
:
230 case UC_BREAK_PROHIBITED
:
235 putc (input
[i
], stdout
);