1 /* Multibyte to UTF-8 conversion.
2 Copyright (C) 2022-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
24 #include <wcsmbsload.h>
27 # define EILSEQ EINVAL
31 /* This is the private state used if PS is NULL. */
32 static mbstate_t state
;
35 mbrtoc8 (char8_t
*pc8
, const char *s
, size_t n
, mbstate_t *ps
)
37 /* This implementation depends on the converter invoked by mbrtowc not
38 needing to retain state in either the top most bit of ps->__count or
39 in ps->__value between invocations. This implementation uses the
40 top most bit of ps->__count to indicate that trailing code units are
41 yet to be written and uses ps->__value to store those code units. */
46 /* If state indicates that trailing code units are yet to be written, write
47 those first regardless of whether 's' is a null pointer. */
48 if (ps
->__count
& 0x80000000)
50 /* ps->__value.__wchb[3] stores the index of the next code unit to
51 write. Code units are stored in reverse order. */
52 size_t i
= ps
->__value
.__wchb
[3];
55 *pc8
= ps
->__value
.__wchb
[i
];
59 ps
->__count
&= 0x7fffffff;
60 ps
->__value
.__wch
= 0;
63 --ps
->__value
.__wchb
[3];
69 /* if 's' is a null pointer, behave as if a null pointer was passed for
70 'pc8', an empty string was passed for 's', and 1 passed for 'n'. */
79 result
= mbrtowc (&wc
, s
, n
, ps
);
90 *pc8
= 0xC0 + ((wc
>> 6) & 0x1F);
91 ps
->__value
.__wchb
[0] = 0x80 + (wc
& 0x3F);
92 ps
->__value
.__wchb
[3] = 0;
93 ps
->__count
|= 0x80000000;
95 else if (wc
<= 0xFFFF)
98 *pc8
= 0xE0 + ((wc
>> 12) & 0x0F);
99 ps
->__value
.__wchb
[1] = 0x80 + ((wc
>> 6) & 0x3F);
100 ps
->__value
.__wchb
[0] = 0x80 + (wc
& 0x3F);
101 ps
->__value
.__wchb
[3] = 1;
102 ps
->__count
|= 0x80000000;
104 else if (wc
<= 0x10FFFF)
107 *pc8
= 0xF0 + ((wc
>> 18) & 0x07);
108 ps
->__value
.__wchb
[2] = 0x80 + ((wc
>> 12) & 0x3F);
109 ps
->__value
.__wchb
[1] = 0x80 + ((wc
>> 6) & 0x3F);
110 ps
->__value
.__wchb
[0] = 0x80 + (wc
& 0x3F);
111 ps
->__value
.__wchb
[3] = 2;
112 ps
->__count
|= 0x80000000;
115 if (result
== 0 && wc
!= 0)
117 /* mbrtowc() never returns -3. When a MB sequence converts to multiple
118 WCs, no input is consumed when writing the subsequent WCs resulting
119 in a result of 0 even if a null character wasn't written. */