1 /* UTF-8 to multibyte conversion.
2 Copyright (C) 2022-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
24 /* This is the private state used if PS is NULL. */
25 static mbstate_t state
;
28 c8rtomb (char *s
, char8_t c8
, mbstate_t *ps
)
30 /* This implementation depends on the converter invoked by wcrtomb not
31 needing to retain state in either the top most bit of ps->__count or
32 in ps->__value between invocations. This implementation uses the
33 top most bit of ps->__count to indicate that trailing code units are
34 expected and uses ps->__value to store previously seen code units. */
43 /* if 's' is a null pointer, behave as if u8'\0' was passed as 'c8'. If
44 this occurs for an incomplete code unit sequence, then an error will
49 if (! (ps
->__count
& 0x80000000))
52 if ((c8
>= 0x80 && c8
<= 0xC1) || c8
>= 0xF5)
54 /* An invalid lead code unit. */
60 /* A valid lead code unit. */
61 ps
->__count
|= 0x80000000;
62 ps
->__value
.__wchb
[0] = c8
;
63 ps
->__value
.__wchb
[3] = 1;
66 /* A single byte (ASCII) code unit. */
71 char8_t cu1
= ps
->__value
.__wchb
[0];
72 if (ps
->__value
.__wchb
[3] == 1)
74 /* A single lead code unit was previously seen. */
75 if ((c8
< 0x80 || c8
> 0xBF)
76 || (cu1
== 0xE0 && c8
< 0xA0)
77 || (cu1
== 0xED && c8
> 0x9F)
78 || (cu1
== 0xF0 && c8
< 0x90)
79 || (cu1
== 0xF4 && c8
> 0x8F))
81 /* An invalid second code unit. */
87 /* A three or four code unit sequence. */
88 ps
->__value
.__wchb
[1] = c8
;
89 ++ps
->__value
.__wchb
[3];
92 wc
= ((cu1
& 0x1F) << 6)
97 char8_t cu2
= ps
->__value
.__wchb
[1];
98 /* A three or four byte code unit sequence. */
99 if (c8
< 0x80 || c8
> 0xBF)
101 /* An invalid third or fourth code unit. */
102 __set_errno (EILSEQ
);
105 if (ps
->__value
.__wchb
[3] == 2 && cu1
>= 0xF0)
107 /* A four code unit sequence. */
108 ps
->__value
.__wchb
[2] = c8
;
109 ++ps
->__value
.__wchb
[3];
114 wc
= ((cu1
& 0x0F) << 12)
115 + ((cu2
& 0x3F) << 6)
120 char8_t cu3
= ps
->__value
.__wchb
[2];
121 wc
= ((cu1
& 0x07) << 18)
122 + ((cu2
& 0x3F) << 12)
123 + ((cu3
& 0x3F) << 6)
127 ps
->__count
&= 0x7fffffff;
128 ps
->__value
.__wch
= 0;
131 return wcrtomb (s
, wc
, ps
);