1 /* Multibyte to UTF-8 conversion.
2 Copyright (C) 2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
24 #include <wcsmbsload.h>
29 # define EILSEQ EINVAL
33 /* This is the private state used if PS is NULL. */
34 static mbstate_t state
;
37 mbrtoc8 (char8_t
*pc8
, const char *s
, size_t n
, mbstate_t *ps
)
39 /* This implementation depends on the converter invoked by mbrtowc not
40 needing to retain state in either the top most bit of ps->__count or
41 in ps->__value between invocations. This implementation uses the
42 top most bit of ps->__count to indicate that trailing code units are
43 yet to be written and uses ps->__value to store those code units. */
48 /* If state indicates that trailing code units are yet to be written, write
49 those first regardless of whether 's' is a null pointer. */
50 if (ps
->__count
& 0x80000000)
52 /* ps->__value.__wchb[3] stores the index of the next code unit to
53 write. Code units are stored in reverse order. */
54 size_t i
= ps
->__value
.__wchb
[3];
57 *pc8
= ps
->__value
.__wchb
[i
];
61 ps
->__count
&= 0x7fffffff;
62 ps
->__value
.__wch
= 0;
65 --ps
->__value
.__wchb
[3];
71 /* if 's' is a null pointer, behave as if a null pointer was passed for
72 'pc8', an empty string was passed for 's', and 1 passed for 'n'. */
81 result
= mbrtowc (&wc
, s
, n
, ps
);
92 *pc8
= 0xC0 + ((wc
>> 6) & 0x1F);
93 ps
->__value
.__wchb
[0] = 0x80 + (wc
& 0x3F);
94 ps
->__value
.__wchb
[3] = 0;
95 ps
->__count
|= 0x80000000;
97 else if (wc
<= 0xFFFF)
100 *pc8
= 0xE0 + ((wc
>> 12) & 0x0F);
101 ps
->__value
.__wchb
[1] = 0x80 + ((wc
>> 6) & 0x3F);
102 ps
->__value
.__wchb
[0] = 0x80 + (wc
& 0x3F);
103 ps
->__value
.__wchb
[3] = 1;
104 ps
->__count
|= 0x80000000;
106 else if (wc
<= 0x10FFFF)
109 *pc8
= 0xF0 + ((wc
>> 18) & 0x07);
110 ps
->__value
.__wchb
[2] = 0x80 + ((wc
>> 12) & 0x3F);
111 ps
->__value
.__wchb
[1] = 0x80 + ((wc
>> 6) & 0x3F);
112 ps
->__value
.__wchb
[0] = 0x80 + (wc
& 0x3F);
113 ps
->__value
.__wchb
[3] = 2;
114 ps
->__count
|= 0x80000000;
117 if (result
== 0 && wc
!= 0)
119 /* mbrtowc() never returns -3. When a MB sequence converts to multiple
120 WCs, no input is consumed when writing the subsequent WCs resulting
121 in a result of 0 even if a null character wasn't written. */