Improve vacpp support.
[boost.git] / boost / libs / iostreams / test / detail / utf8_codecvt_facet.hpp
blob7aa5e75e094eb6234b0d84ef7a0c7193190c4d6b
1 #ifndef BOOST_UTF8_CODECVT_FACET_HPP
2 #define BOOST_UTF8_CODECVT_FACET_HPP
4 #include <boost/iostreams/detail/config/wide_streams.hpp>
5 #ifdef BOOST_IOSTREAMS_NO_WIDE_STREAMS
6 # error wide streams not supported on this platform
7 #endif
9 // MS compatible compilers support #pragma once
10 #if defined(_MSC_VER) && (_MSC_VER >= 1020)
11 # pragma once
12 #endif
14 /////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
15 // utf8_codecvt_facet.hpp
17 // Copyright © 2001 Ronald Garcia, Indiana University (garcia@osl.iu.edu)
18 // Andrew Lumsdaine, Indiana University (lums@osl.iu.edu).
19 // Distributed under the Boost Software License, Version 1.0. (See accompany-
20 // ing file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
22 // Note:(Robert Ramey). I have made the following alterations in the original
23 // code.
24 // a) Rendered utf8_codecvt<wchar_t, char> with using templates
25 // b) Move longer functions outside class definition to prevent inlining
26 // and make code smaller
27 // c) added on a derived class to permit translation to/from current
28 // locale to utf8
30 // See http://www.boost.org for updates, documentation, and revision history.
32 // archives stored as text - note these ar templated on the basic
33 // stream templates to accommodate wide (and other?) kind of characters
35 // note the fact that on libraries without wide characters, ostream is
36 // is not a specialization of basic_ostream which in fact is not defined
37 // in such cases. So we can't use basic_ostream<OStream::char_type> but rather
38 // use two template parameters
40 // utf8_codecvt_facet
41 // This is an implementation of a std::codecvt facet for translating
42 // from UTF-8 externally to UCS-4. Note that this is not tied to
43 // any specific types in order to allow customization on platforms
44 // where wchar_t is not big enough.
46 // NOTES: The current implementation jumps through some unpleasant hoops in
47 // order to deal with signed character types. As a std::codecvt_base::result,
48 // it is necessary for the ExternType to be convertible to unsigned char.
49 // I chose not to tie the extern_type explicitly to char. But if any combination
50 // of types other than <wchar_t,char_t> is used, then std::codecvt must be
51 // specialized on those types for this to work.
53 #include <locale>
54 #include <cstddef> // size_t
55 #include <boost/integer_traits.hpp>
56 #include <boost/iostreams/detail/config/wide_streams.hpp>
57 #include <boost/iostreams/detail/codecvt_helper.hpp>
59 // maximum lenght of a multibyte string
60 #define MB_LENGTH_MAX 8
62 struct utf8_codecvt_facet_wchar_t
63 : public boost::iostreams::detail::codecvt_helper<wchar_t, char, std::mbstate_t>
65 public:
66 explicit utf8_codecvt_facet_wchar_t(std::size_t no_locale_manage = 0)
67 : boost::iostreams::detail::codecvt_helper<wchar_t, char, std::mbstate_t>
68 (no_locale_manage)
69 { }
70 protected:
71 virtual std::codecvt_base::result do_in(
72 std::mbstate_t& state,
73 const char * from,
74 const char * from_end,
75 const char * & from_next,
76 wchar_t * to,
77 wchar_t * to_end,
78 wchar_t*& to_next
79 ) const;
81 virtual std::codecvt_base::result do_out(
82 std::mbstate_t & state, const wchar_t * from,
83 const wchar_t * from_end, const wchar_t* & from_next,
84 char * to, char * to_end, char * & to_next
85 ) const;
87 bool invalid_continuing_octet(unsigned char octet_1) const {
88 return (octet_1 < 0x80|| 0xbf< octet_1);
91 bool invalid_leading_octet(unsigned char octet_1) const {
92 return (0x7f < octet_1 && octet_1 < 0xc0) ||
93 (octet_1 > 0xfd);
96 // continuing octets = octets except for the leading octet
97 static unsigned int get_cont_octet_count(unsigned char lead_octet) {
98 return get_octet_count(lead_octet) - 1;
101 static unsigned int get_octet_count(unsigned char lead_octet);
103 // How many "continuing octets" will be needed for this word
104 // == total octets - 1.
105 int get_cont_octet_out_count(wchar_t word) const ;
107 virtual bool do_always_noconv() const throw() { return false; }
109 // UTF-8 isn't really stateful since we rewind on partial conversions
110 virtual std::codecvt_base::result do_unshift(
111 std::mbstate_t&,
112 char * from,
113 char * to,
114 char * & next
115 ) const{
116 next = from;
117 return ok;
120 virtual int do_encoding() const throw() {
121 const int variable_byte_external_encoding=0;
122 return variable_byte_external_encoding;
125 // How many char objects can I process to get <= max_limit
126 // wchar_t objects?
127 virtual int do_length(
128 BOOST_IOSTREAMS_CODECVT_CV_QUALIFIER std::mbstate_t &,
129 const char * from,
130 const char * from_end,
131 std::size_t max_limit
132 ) const throw();
134 // Largest possible value do_length(state,from,from_end,1) could return.
135 virtual int do_max_length() const throw () {
136 return 6; // largest UTF-8 encoding of a UCS-4 character
140 #if 0 // not used - incorrect in any case
141 // Robert Ramey - use the above to make a code converter from multi-byte
142 // char strings to utf8 encoding
143 struct utf8_codecvt_facet_char : public utf8_codecvt_facet_wchar_t
145 typedef utf8_codecvt_facet_wchar_t base_class;
146 public:
147 explicit utf8_codecvt_facet_char(std::size_t no_locale_manage=0)
148 : base_class(no_locale_manage)
150 protected:
151 virtual std::codecvt_base::result do_in(
152 std::mbstate_t & state,
153 const char * from,
154 const char * from_end,
155 const char * & from_next,
156 char * to,
157 char * to_end,
158 char * & to_next
159 ) const;
161 virtual std::codecvt_base::result do_out(
162 std::mbstate_t & state,
163 const char * from,
164 const char * from_end,
165 const char* & from_next,
166 char * to,
167 char * to_end,
168 char * & to_next
169 ) const;
171 // How many char objects can I process to get <= max_limit
172 // char objects?
173 virtual int do_length(
174 const std::mbstate_t&,
175 const char * from,
176 const char * from_end,
177 std::size_t max_limit
178 ) const;
180 #endif
182 template<class Internal, class External>
183 struct utf8_codecvt_facet
186 template<>
187 struct utf8_codecvt_facet<wchar_t, char>
188 : public utf8_codecvt_facet_wchar_t
191 #if 0
192 template<>
193 struct utf8_codecvt_facet<char, char>
194 : public utf8_codecvt_facet_char
196 #endif
198 #endif // BOOST_UTF8_CODECVT_FACET_HPP