1 // std::print -*- C++ -*-
3 // Copyright The GNU Toolchain Authors.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
28 #include <system_error>
30 #include <cstdint> // uint32_t
32 #include <ext/stdio_filebuf.h>
33 #include <ext/stdio_sync_filebuf.h>
34 #include <ext/numeric_traits.h>
37 # include <stdio.h> // _fileno
38 # include <io.h> // _get_osfhandle, _open_osfhandle, _write
39 # include <fcntl.h> // _O_APPEND
40 # include <windows.h> // GetLastError, WriteConsoleW
41 #elifdef _GLIBCXX_HAVE_UNISTD_H
42 # include <stdio.h> // fileno
43 # include <unistd.h> // isatty
46 namespace std
_GLIBCXX_VISIBILITY(default)
48 _GLIBCXX_BEGIN_NAMESPACE_VERSION
54 check_for_console(void* handle
)
56 if (handle
!= nullptr && handle
!= INVALID_HANDLE_VALUE
)
58 unsigned long mode
; // unused
59 if (::GetConsoleMode(handle
, &mode
))
67 // This returns intptr_t that is either a Windows HANDLE
68 // or 1 + a POSIX file descriptor. A zero return indicates failure.
70 __open_terminal(FILE* f
)
72 #ifndef _GLIBCXX_USE_STDIO_PURE
76 if (int fd
= ::_fileno(f
); fd
>= 0)
77 return check_for_console((void*)_get_osfhandle(fd
));
78 #elifdef _GLIBCXX_HAVE_UNISTD_H
79 if (int fd
= ::fileno(f
); fd
>= 0 && ::isatty(fd
))
88 __open_terminal(std::streambuf
* sb
)
90 #ifndef _GLIBCXX_USE_STDIO_PURE
91 using namespace __gnu_cxx
;
93 if (auto fb
= dynamic_cast<stdio_sync_filebuf
<char>*>(sb
))
94 return __open_terminal(fb
->file());
96 if (auto fb
= dynamic_cast<stdio_filebuf
<char>*>(sb
))
97 return __open_terminal(fb
->file());
99 #ifdef __glibcxx_fstream_native_handle
101 if (auto fb
= dynamic_cast<filebuf
*>(sb
))
102 return check_for_console(fb
->native_handle());
103 #elifdef _GLIBCXX_HAVE_UNISTD_H
104 if (auto fb
= dynamic_cast<filebuf
*>(sb
))
105 if (int fd
= fb
->native_handle(); fd
>= 0 && ::isatty(fd
))
106 return ::fdopen(::dup(fd
), "w"); // Caller must call fclose.
109 #endif // ! _GLIBCXX_USE_STDIO_PURE
116 // Validate UTF-8 string, replacing invalid sequences with U+FFFD.
118 // Return true if the input is valid UTF-8, false otherwise.
120 // If sizeof(_CharT) > 1, then transcode a valid string into out,
121 // using either UTF-16 or UTF-32 as determined by sizeof(_CharT).
123 // If sizeof(_CharT) == 1 and the input is valid UTF-8, both s and out will
124 // be unchanged. Otherwise, each invalid sequence in s will be overwritten
125 // with a single 0xFF byte followed by zero or more 0xFE bytes, and then
126 // a valid UTF-8 string will be produced in out (replacing invalid
127 // sequences with U+FFFD).
128 template<typename _CharT
>
130 to_valid_unicode(span
<char> s
, basic_string
<_CharT
>& out
)
132 constexpr bool transcode
= sizeof(_CharT
) > 1;
134 unsigned seen
= 0, needed
= 0;
135 unsigned char lo_bound
= 0x80, hi_bound
= 0xBF;
138 [[maybe_unused
]] uint32_t code_point
{};
139 if constexpr (transcode
)
142 // XXX: count code points in s instead of bytes?
143 out
.reserve(s
.size());
146 auto q
= s
.data(), eoq
= q
+ s
.size();
149 unsigned char byte
= *q
;
152 if (byte
<= 0x7F) [[likely
]] // 0x00 to 0x7F
154 if constexpr (transcode
)
155 out
.push_back(_CharT(byte
));
157 // Fast forward to the next non-ASCII character.
158 while (++q
!= eoq
&& (unsigned char)*q
<= 0x7F)
160 if constexpr (transcode
)
165 else if (byte
< 0xC2)
167 if constexpr (transcode
)
168 out
.push_back(0xFFFD);
173 else if (byte
<= 0xDF) // 0xC2 to 0xDF
176 if constexpr (transcode
)
177 code_point
= byte
& 0x1F;
179 else if (byte
<= 0xEF) // 0xE0 to 0xEF
183 else if (byte
== 0xED)
187 if constexpr (transcode
)
188 code_point
= byte
& 0x0F;
190 else if (byte
<= 0xF4) // 0xF0 to 0xF4
194 else if (byte
== 0xF4)
198 if constexpr (transcode
)
199 code_point
= byte
& 0x07;
203 if constexpr (transcode
)
204 out
.push_back(0xFFFD);
212 if (byte
< lo_bound
|| byte
> hi_bound
) [[unlikely
]]
214 if constexpr (transcode
)
215 out
.push_back(0xFFFD);
218 *(q
- seen
- 1) = 0xFF;
219 __builtin_memset(q
- seen
, 0xFE, seen
);
225 continue; // Reprocess the current character.
228 if constexpr (transcode
)
229 code_point
= (code_point
<< 6) | (byte
& 0x3f);
234 if (seen
== needed
) [[likely
]]
236 if constexpr (transcode
)
238 if (code_point
<= __gnu_cxx::__int_traits
<_CharT
>::__max
)
239 out
.push_back(code_point
);
243 // http://www.unicode.org/faq/utf_bom.html#utf16-4
244 const char32_t LEAD_OFFSET
= 0xD800 - (0x10000 >> 10);
245 char16_t lead
= LEAD_OFFSET
+ (code_point
>> 10);
246 char16_t trail
= 0xDC00 + (code_point
& 0x3FF);
248 out
.push_back(trail
);
257 if (needed
) [[unlikely
]]
259 // The string ends with an incomplete multibyte sequence.
260 if constexpr (transcode
)
261 out
.push_back(0xFFFD);
264 // Truncate the incomplete sequence to a single byte.
266 s
= s
.first(s
.size() - seen
);
272 if (errors
== 0) [[likely
]]
274 else if constexpr (!transcode
)
276 out
.reserve(s
.size() + errors
* 2);
277 for (unsigned char byte
: s
)
279 if (byte
< 0xFE) [[likely
]]
281 else if (byte
== 0xFF)
282 out
+= "\xef\xbf\xbd"; // U+FFFD in UTF-8
288 // Validate UTF-8 string.
289 // Returns true if s is valid UTF-8, otherwise returns false and stores
290 // a valid UTF-8 string in err.
291 [[__gnu__::__always_inline__
]]
293 to_valid_utf8(span
<char> s
, string
& err
)
295 return to_valid_unicode(s
, err
);
298 // Transcode UTF-8 string to UTF-16.
299 // Returns true if s is valid UTF-8, otherwise returns false.
300 // In either case, a valid UTF-16 string is stored in u16.
301 [[__gnu__::__always_inline__
]]
303 to_valid_utf16(span
<char> s
, u16string
& u16
)
305 return to_valid_unicode(s
, u16
);
309 // Write a UTF-8 string to a file descriptor/handle.
310 // Ill-formed sequences in the string will be substituted with U+FFFD.
312 __write_to_terminal(void* term
, span
<char> str
)
314 if (term
== nullptr) [[unlikely
]]
315 return std::make_error_code(std::errc::invalid_argument
);
320 // We could use std::wstring here instead of std::u16string. In general
321 // char_traits<wchar_t> is more optimized than char_traits<char16_t> but
322 // for the purposes of to_valid_unicode only char_traits::copy matters,
323 // and char_traits<char16_t>::copy uses memcpy so is OK.
325 if (!to_valid_utf16(str
, wstr
))
326 ec
= std::make_error_code(errc::illegal_byte_sequence
);
328 // This allows us to test this function with a normal file,
329 // see testsuite/27_io/print/2.cc
330 if (!check_for_console(term
))
332 int fd
= _open_osfhandle((intptr_t)term
, _O_APPEND
);
333 if (_write(fd
, wstr
.data(), wstr
.size() * 2) == -1)
334 ec
= {errno
, generic_category()};
338 unsigned long nchars
= 0;
339 WriteConsoleW(term
, wstr
.data(), wstr
.size(), &nchars
, nullptr);
340 if (nchars
!= wstr
.size())
341 return {(int)GetLastError(), system_category()};
342 #elifdef _GLIBCXX_HAVE_UNISTD_H
344 if (!to_valid_utf8(str
, out
))
347 ec
= std::make_error_code(errc::illegal_byte_sequence
);
350 auto n
= std::fwrite(str
.data(), 1, str
.size(), (FILE*)term
);
352 ec
= std::make_error_code(errc::io_error
);
354 ec
= std::make_error_code(std::errc::function_not_supported
);
358 _GLIBCXX_END_NAMESPACE_VERSION