1 // <text_encoding> -*- C++ -*-
3 // Copyright The GNU Toolchain Authors.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file include/text_encoding
26 * This is a Standard C++ Library header.
29 #ifndef _GLIBCXX_TEXT_ENCODING
30 #define _GLIBCXX_TEXT_ENCODING
32 #pragma GCC system_header
34 #include <bits/requires_hosted.h>
36 #define __glibcxx_want_text_encoding
37 #include <bits/version.h>
39 #ifdef __cpp_lib_text_encoding
41 #include <string_view>
42 #include <bits/functional_hash.h> // hash
43 #include <bits/ranges_util.h> // view_interface
44 #include <bits/unicode.h> // __charset_alias_match
45 #include <ext/numeric_traits.h> // __int_traits
47 namespace std _GLIBCXX_VISIBILITY(default)
49 _GLIBCXX_BEGIN_NAMESPACE_VERSION
52 * @brief An interface for accessing the IANA Character Sets registry.
61 using id = __INT_LEAST32_TYPE__;
66 operator<(const _Rep& __r, id __m) noexcept
67 { return __r._M_id < __m; }
70 operator==(const _Rep& __r, string_view __name) noexcept
71 { return __r._M_name == __name; }
75 static constexpr size_t max_name_length = 63;
77 enum class id : _Rep::id
93 HalfWidthKatakana = 15,
96 EUCPkdFmtJapanese = 18,
97 EUCFixWidJapanese = 19,
98 ISO4UnitedKingdom = 20,
99 ISO11SwedishForNames = 21,
103 ISO60DanishNorwegian = 25,
106 ISO646basic1983 = 28,
108 ISO2IntlRefVersion = 30,
117 ISO13JISC6220jp = 41,
118 ISO14JISC6220ro = 42,
119 ISO16Portuguese = 43,
121 ISO19LatinGreek = 45,
123 ISO27LatinGreek1 = 47,
124 ISO5427Cyrillic = 48,
125 ISO42JISC62261978 = 49,
126 ISO47BSViewdata = 50,
129 ISO51INISCyrillic = 53,
134 ISO61Norwegian2 = 58,
135 ISO70VideotexSupp1 = 59,
136 ISO84Portuguese2 = 60,
143 ISO91JISC62291984a = 67,
144 ISO92JISC62991984b = 68,
145 ISO93JIS62291984badd = 69,
146 ISO94JIS62291984hand = 70,
147 ISO95JIS62291984handadd = 71,
148 ISO96JISC62291984kana = 72,
153 ISO111ECMACyrillic = 77,
154 ISO121Canadian1 = 78,
155 ISO122Canadian2 = 79,
156 ISO123CSAZ24341985gr = 80,
162 ISO139CSN369103 = 86,
163 ISO141JUSIB1002 = 87,
166 ISO147Macedonian = 90,
170 ISO153GOST1976874 = 94,
174 ISO159JISX02121990 = 98,
189 OSDEBCDICDF0415 = 115,
190 OSDEBCDICDF03IRV = 116,
191 OSDEBCDICDF041 = 117,
197 UnicodeLatin1 = 1003,
198 UnicodeJapanese = 1004,
199 UnicodeIBM1261 = 1005,
200 UnicodeIBM1268 = 1006,
201 UnicodeIBM1276 = 1007,
202 UnicodeIBM1264 = 1008,
203 UnicodeIBM1265 = 1009,
216 Windows30Latin1 = 2000,
217 Windows31Latin1 = 2001,
218 Windows31Latin2 = 2002,
219 Windows31Latin5 = 2003,
221 AdobeStandardEncoding = 2005,
223 VenturaInternational = 2007,
225 PC850Multilingual = 2009,
226 PC8DanishNorwegian = 2012,
227 PC862LatinHebrew = 2013,
237 MicrosoftPublishing = 2023,
258 PC8CodePage437 = 2011,
280 IBMEBCDICATDE = 2064,
340 constexpr text_encoding() = default;
343 text_encoding(string_view __enc) noexcept
344 : _M_rep(_S_find_name(__enc))
346 __enc.copy(_M_name, max_name_length);
349 // @pre i has the value of one of the enumerators of id.
351 text_encoding(id __i) noexcept
352 : _M_rep(_S_find_id(__i))
354 if (string_view __name(_M_rep->_M_name); !__name.empty())
355 __name.copy(_M_name, max_name_length);
358 constexpr id mib() const noexcept { return id(_M_rep->_M_id); }
360 constexpr const char* name() const noexcept { return _M_name; }
362 struct aliases_view : ranges::view_interface<aliases_view>
366 struct _Sentinel { };
369 constexpr _Iterator begin() const noexcept;
370 constexpr _Sentinel end() const noexcept { return {}; }
373 friend struct text_encoding;
375 constexpr explicit aliases_view(const _Rep* __r) : _M_begin(__r) { }
377 const _Rep* _M_begin = nullptr;
380 constexpr aliases_view
381 aliases() const noexcept
383 return _M_rep->_M_name[0] ? aliases_view(_M_rep) : aliases_view{nullptr};
386 friend constexpr bool
387 operator==(const text_encoding& __a,
388 const text_encoding& __b) noexcept
390 if (__a.mib() == id::other && __b.mib() == id::other) [[unlikely]]
391 return _S_comp(__a._M_name, __b._M_name);
393 return __a.mib() == __b.mib();
396 friend constexpr bool
397 operator==(const text_encoding& __encoding, id __i) noexcept
398 { return __encoding.mib() == __i; }
400 #if __CHAR_BIT__ == 8
401 static consteval text_encoding
404 #ifdef __GNUC_EXECUTION_CHARSET_NAME
405 return text_encoding(__GNUC_EXECUTION_CHARSET_NAME);
406 #elif defined __clang_literal_encoding__
407 return text_encoding(__clang_literal_encoding__);
409 return text_encoding();
419 { return text_encoding(_Id)._M_is_environment(); }
421 static text_encoding literal() = delete;
422 static text_encoding environment() = delete;
423 template<id> static bool environment_is() = delete;
427 const _Rep* _M_rep = _S_reps + 1; // id::unknown
428 char _M_name[max_name_length + 1] = {0};
431 _M_is_environment() const;
433 static inline constexpr _Rep _S_reps[] = {
434 { 1, "" }, { 2, "" },
435 #define _GLIBCXX_GET_ENCODING_DATA
436 #include <bits/text_encoding-data.h>
437 #ifdef _GLIBCXX_GET_ENCODING_DATA
438 # error "Invalid text_encoding data"
440 { 9999, nullptr }, // sentinel
443 static constexpr bool
444 _S_comp(string_view __a, string_view __b)
445 { return __unicode::__charset_alias_match(__a, __b); }
447 static constexpr const _Rep*
448 _S_find_name(string_view __name) noexcept
450 #ifdef _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET
451 // Optimize the common UTF-8 case to avoid a linear search through all
452 // strings in the table using the _S_comp function.
453 if (__name == "UTF-8")
454 return _S_reps + 2 + _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET;
457 // The first two array elements (other and unknown) don't have names.
458 // The last element is a sentinel that can never match anything.
459 const auto __first = _S_reps + 2, __end = std::end(_S_reps) - 1;
460 for (auto __r = __first; __r != __end; ++__r)
461 if (_S_comp(__r->_M_name, __name))
463 // Might have matched an alias. Find the first entry for this ID.
464 const auto __id = __r->_M_id;
465 while (__r[-1]._M_id == __id)
469 return _S_reps; // id::other
472 static constexpr const _Rep*
473 _S_find_id(id __id) noexcept
475 const auto __i = (_Rep::id)__id;
476 const auto __r = std::lower_bound(_S_reps, std::end(_S_reps) - 1, __i);
477 if (__r->_M_id == __i) [[likely]]
481 // Preconditions: i has the value of one of the enumerators of id.
482 __glibcxx_assert(__r->_M_id == __i);
483 return _S_reps + 1; // id::unknown
489 struct hash<text_encoding>
492 operator()(const text_encoding& __enc) const noexcept
493 { return std::hash<text_encoding::id>()(__enc.mib()); }
496 class text_encoding::aliases_view::_Iterator
499 using value_type = const char*;
500 using reference = const char*;
501 using difference_type = int;
503 constexpr _Iterator() = default;
508 if (_M_dereferenceable()) [[likely]]
509 return _M_rep->_M_name;
512 __glibcxx_assert(_M_dereferenceable());
520 if (_M_dereferenceable()) [[likely]]
524 __glibcxx_assert(_M_dereferenceable());
533 const bool __decrementable
534 = _M_rep != nullptr && _M_rep[-1]._M_id == _M_id;
535 if (__decrementable) [[likely]]
539 __glibcxx_assert(__decrementable);
562 operator[](difference_type __n) const
563 { return *(*this + __n); }
566 operator+=(difference_type __n)
568 if (_M_rep != nullptr)
572 if (__n < (std::end(_S_reps) - _M_rep)
573 && _M_rep[__n - 1]._M_id == _M_id) [[likely]]
576 *this == _Iterator{};
580 if (__n > (_S_reps - _M_rep)
581 && _M_rep[__n]._M_id == _M_id) [[likely]]
584 *this == _Iterator{};
588 __glibcxx_assert(_M_rep != nullptr);
593 operator-=(difference_type __n)
595 using _Traits = __gnu_cxx::__int_traits<difference_type>;
596 if (__n == _Traits::__min) [[unlikely]]
597 return operator+=(_Traits::__max);
598 return operator+=(-__n);
601 constexpr difference_type
602 operator-(const _Iterator& __i) const
604 if (_M_id == __i._M_id)
605 return _M_rep - __i._M_rep;
606 __glibcxx_assert(_M_id == __i._M_id);
607 return __gnu_cxx::__int_traits<difference_type>::__max;
611 operator==(const _Iterator&) const = default;
614 operator==(_Sentinel) const noexcept
615 { return !_M_dereferenceable(); }
617 constexpr strong_ordering
618 operator<=>(const _Iterator& __i) const
620 __glibcxx_assert(_M_id == __i._M_id);
621 return _M_rep <=> __i._M_rep;
624 friend constexpr _Iterator
625 operator+(_Iterator __i, difference_type __n)
631 friend constexpr _Iterator
632 operator+(difference_type __n, _Iterator __i)
638 friend constexpr _Iterator
639 operator-(_Iterator __i, difference_type __n)
646 friend class text_encoding;
649 _Iterator(const _Rep* __r) noexcept
650 : _M_rep(__r), _M_id(__r ? __r->_M_id : 0)
654 _M_dereferenceable() const noexcept
655 { return _M_rep != nullptr && _M_rep->_M_id == _M_id; }
657 const _Rep* _M_rep = nullptr;
662 text_encoding::aliases_view::begin() const noexcept
664 { return _Iterator(_M_begin); }
668 // Opt-in to borrowed_range concept
670 inline constexpr bool
671 enable_borrowed_range<std::text_encoding::aliases_view> = true;
674 _GLIBCXX_END_NAMESPACE_VERSION
677 #endif // __cpp_lib_text_encoding
678 #endif // _GLIBCXX_TEXT_ENCODING