2003-07-04 Benjamin Kosnik <bkoz@redhat.com>
[official-gcc.git] / libstdc++-v3 / testsuite / 22_locale / codecvt / unicode / char.cc
blob1fa9648f5e86faae5812c9e49439b757adb999b4
1 // 2000-08-22 Benjamin Kosnik <bkoz@cygnus.com>
3 // Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 2, or (at your option)
9 // any later version.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License along
17 // with this library; see the file COPYING. If not, write to the Free
18 // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
19 // USA.
21 // 22.2.1.5 - Template class codecvt [lib.locale.codecvt]
22 #include <locale>
23 #include <testsuite_hooks.h>
25 #ifdef _GLIBCXX_USE___ENC_TRAITS
27 // Need some char_traits specializations for this to work.
28 typedef unsigned short unicode_t;
30 namespace std
32 template<>
33 struct char_traits<unicode_t>
35 typedef unicode_t char_type;
36 // Unsigned as wint_t is unsigned.
37 typedef unsigned long int_type;
38 typedef streampos pos_type;
39 typedef streamoff off_type;
40 typedef mbstate_t state_type;
42 static void
43 assign(char_type& __c1, const char_type& __c2);
45 static bool
46 eq(const char_type& __c1, const char_type& __c2);
48 static bool
49 lt(const char_type& __c1, const char_type& __c2);
51 static int
52 compare(const char_type* __s1, const char_type* __s2, size_t __n)
53 { return memcmp(__s1, __s2, __n); }
55 static size_t
56 length(const char_type* __s);
58 static const char_type*
59 find(const char_type* __s, size_t __n, const char_type& __a);
61 static char_type*
62 move(char_type* __s1, const char_type* __s2, size_t __n);
64 static char_type*
65 copy(char_type* __s1, const char_type* __s2, size_t __n)
66 { return static_cast<char_type*>(memcpy(__s1, __s2, __n)); }
68 static char_type*
69 assign(char_type* __s, size_t __n, char_type __a);
71 static char_type
72 to_char_type(const int_type& __c);
74 static int_type
75 to_int_type(const char_type& __c);
77 static bool
78 eq_int_type(const int_type& __c1, const int_type& __c2);
80 static int_type
81 eof();
83 static int_type
84 not_eof(const int_type& __c);
89 > how do I check that these conversions are correct?
90 Very easy. Since all the characters are from ASCII you simply
91 zero-extend the values.
93 drepper$ echo 'black pearl jasmine tea' | od -t x1
94 0000000 62 6c 61 63 6b 20 70 65 61 72 6c 20 6a 61 73 6d
95 0000020 69 6e 65 20 74 65 61 0a
97 So the UCS-2 string is
99 0x0062, 0x006c, 0x0061, ...
101 You get the idea. With iconv() you have to take care of the
102 byte-order, though. UCS-2 can mean little- or big endian. Looking at
103 your result
105 > $9 = 25856
107 it shows that the other byte-order is used (25856 == 0x6500).
111 void
112 initialize_state(std::__enc_traits& state)
113 { state._M_init(); }
115 // Partial specialization using __enc_traits.
116 // codecvt<unicode_t, char, __enc_traits>
117 // UNICODE - UCS2 (big endian)
118 void test01()
120 using namespace std;
121 typedef codecvt_base::result result;
122 typedef unicode_t int_type;
123 typedef char ext_type;
124 typedef __enc_traits enc_type;
125 typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
126 typedef char_traits<int_type> int_traits;
127 typedef char_traits<ext_type> ext_traits;
129 bool test = true;
130 const ext_type* e_lit = "black pearl jasmine tea";
131 int size = strlen(e_lit);
133 char i_lit_base[50] __attribute__((aligned(__alignof__(int_type)))) =
135 0x00, 0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20,
136 0x00, 0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20,
137 0x00, 0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e,
138 0x00, 0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0
140 const int_type* i_lit = reinterpret_cast<int_type*>(i_lit_base);
142 const ext_type* efrom_next;
143 const int_type* ifrom_next;
144 ext_type* e_arr = new ext_type[size + 1];
145 ext_type* eto_next;
146 int_type* i_arr = new int_type[size + 1];
147 int_type* ito_next;
149 // construct a locale object with the specialized facet.
150 locale loc(locale::classic(), new unicode_codecvt);
151 // sanity check the constructed locale has the specialized facet.
152 VERIFY( has_facet<unicode_codecvt>(loc) );
153 const unicode_codecvt& cvt = use_facet<unicode_codecvt>(loc);
155 // in
156 // unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
157 unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0, 0);
158 initialize_state(state01);
159 // internal encoding is bigger because of bom
160 result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
161 i_arr, i_arr + size + 1, ito_next);
162 VERIFY( r1 == codecvt_base::ok );
163 VERIFY( !int_traits::compare(i_arr, i_lit, size) );
164 VERIFY( efrom_next == e_lit + size );
165 VERIFY( ito_next == i_arr + size );
167 // out
168 unicode_codecvt::state_type state02("UCS-2BE", "ISO-8859-15", 0, 0);
169 initialize_state(state02);
170 result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
171 e_arr, e_arr + size, eto_next);
172 VERIFY( r2 == codecvt_base::ok );
173 VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
174 VERIFY( ifrom_next == i_lit + size );
175 VERIFY( eto_next == e_arr + size );
177 // unshift
178 ext_traits::copy(e_arr, e_lit, size);
179 unicode_codecvt::state_type state03("UCS-2BE", "ISO-8859-15", 0, 0);
180 initialize_state(state03);
181 result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
182 VERIFY( r3 == codecvt_base::noconv );
183 VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
184 VERIFY( eto_next == e_arr );
186 int i = cvt.encoding();
187 VERIFY( i == 2 ); // Target-dependent.
189 VERIFY( !cvt.always_noconv() );
191 unicode_codecvt::state_type state04("UCS-2BE", "ISO-8859-15", 0, 0);
192 initialize_state(state04);
193 int j = cvt.length(state03, e_lit, e_lit + size, 5);
194 VERIFY( j == 5 );
196 int k = cvt.max_length();
197 VERIFY( k == 1 );
199 delete [] e_arr;
200 delete [] i_arr;
203 // Partial specialization using __enc_traits.
204 // codecvt<unicode_t, char, __enc_traits>
205 // UNICODE - UCS2 (little endian)
206 void test02()
208 using namespace std;
209 typedef codecvt_base::result result;
210 typedef unsigned short unicode_t;
211 typedef unicode_t int_type;
212 typedef char ext_type;
213 typedef __enc_traits enc_type;
214 typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
215 typedef char_traits<int_type> int_traits;
216 typedef char_traits<ext_type> ext_traits;
218 bool test = true;
219 const ext_type* e_lit = "black pearl jasmine tea";
220 int size = strlen(e_lit);
222 char i_lit_base[50] __attribute__((aligned(__alignof__(int_type)))) =
224 0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20, 0x00,
225 0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20, 0x00,
226 0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e, 0x00,
227 0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0, 0x00
229 const int_type* i_lit = reinterpret_cast<int_type*>(i_lit_base);
231 const ext_type* efrom_next;
232 const int_type* ifrom_next;
233 ext_type* e_arr = new ext_type[size + 1];
234 ext_type* eto_next;
235 int_type* i_arr = new int_type[size + 1];
236 int_type* ito_next;
238 // construct a locale object with the specialized facet.
239 locale loc(locale::classic(), new unicode_codecvt);
240 // sanity check the constructed locale has the specialized facet.
241 VERIFY( has_facet<unicode_codecvt>(loc) );
242 const unicode_codecvt& cvt = use_facet<unicode_codecvt>(loc);
244 // in
245 unicode_codecvt::state_type state01("UCS-2LE", "ISO-8859-15", 0, 0);
246 initialize_state(state01);
247 // internal encoding is bigger because of bom
248 result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
249 i_arr, i_arr + size + 1, ito_next);
250 VERIFY( r1 == codecvt_base::ok );
251 VERIFY( !int_traits::compare(i_arr, i_lit, size) );
252 VERIFY( efrom_next == e_lit + size );
253 VERIFY( ito_next == i_arr + size );
255 // out
256 unicode_codecvt::state_type state02("UCS-2LE", "ISO-8859-15", 0, 0);
257 initialize_state(state02);
258 result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
259 e_arr, e_arr + size, eto_next);
260 VERIFY( r2 == codecvt_base::ok );
261 VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
262 VERIFY( ifrom_next == i_lit + size );
263 VERIFY( eto_next == e_arr + size );
265 // unshift
266 ext_traits::copy(e_arr, e_lit, size);
267 unicode_codecvt::state_type state03("UCS-2LE", "ISO-8859-15", 0, 0);
268 initialize_state(state03);
269 result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
270 VERIFY( r3 == codecvt_base::noconv );
271 VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
272 VERIFY( eto_next == e_arr );
274 int i = cvt.encoding();
275 VERIFY( i == 2 ); // Target-dependent.
277 VERIFY( !cvt.always_noconv() );
279 unicode_codecvt::state_type state04("UCS-2LE", "ISO-8859-15", 0, 0);
280 initialize_state(state04);
281 int j = cvt.length(state03, e_lit, e_lit + size, 5);
282 VERIFY( j == 5 );
284 int k = cvt.max_length();
285 VERIFY( k == 1 );
287 delete [] e_arr;
288 delete [] i_arr;
291 #endif // _GLIBCXX_USE___ENC_TRAITS
293 int main ()
295 #if _GLIBCXX_USE___ENC_TRAITS
296 test01();
297 test02();
298 #endif
299 return 0;