libstdc++-v3/testsuite/22_locale/codecvt/unicode/char.cc

   1 // 2000-08-22 Benjamin Kosnik <bkoz@cygnus.com>
   2
   3 // Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation
   4 //
   5 // This file is part of the GNU ISO C++ Library.  This library is free
   6 // software; you can redistribute it and/or modify it under the
   7 // terms of the GNU General Public License as published by the
   8 // Free Software Foundation; either version 2, or (at your option)
   9 // any later version.
  10
  11 // This library is distributed in the hope that it will be useful,
  12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 // GNU General Public License for more details.
  15
  16 // You should have received a copy of the GNU General Public License along
  17 // with this library; see the file COPYING.  If not, write to the Free
  18 // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  19 // USA.
  20
  21 // 22.2.1.5 - Template class codecvt [lib.locale.codecvt]
  22 #include <locale>
  23 #include <testsuite_hooks.h>
  24
  25 #ifdef _GLIBCXX_USE___ENC_TRAITS
  26
  27 // Need some char_traits specializations for this to work.
  28 typedef unsigned short                  unicode_t;
  29
  30 namespace std
  31 {
  32   template<>
  33     struct char_traits<unicode_t>
  34     {
  35       typedef unicode_t         char_type;
  36       // Unsigned as wint_t is unsigned.
  37       typedef unsigned long     int_type;
  38       typedef streampos         pos_type;
  39       typedef streamoff         off_type;
  40       typedef mbstate_t         state_type;
  41
  42       static void
  43       assign(char_type& __c1, const char_type& __c2);
  44
  45       static bool
  46       eq(const char_type& __c1, const char_type& __c2);
  47
  48       static bool
  49       lt(const char_type& __c1, const char_type& __c2);
  50
  51       static int
  52       compare(const char_type* __s1, const char_type* __s2, size_t __n)
  53       { return memcmp(__s1, __s2, __n); }
  54
  55       static size_t
  56       length(const char_type* __s);
  57
  58       static const char_type*
  59       find(const char_type* __s, size_t __n, const char_type& __a);
  60
  61       static char_type*
  62       move(char_type* __s1, const char_type* __s2, size_t __n);
  63
  64       static char_type*
  65       copy(char_type* __s1, const char_type* __s2, size_t __n)
  66       {  return static_cast<char_type*>(memcpy(__s1, __s2, __n)); }
  67
  68       static char_type*
  69       assign(char_type* __s, size_t __n, char_type __a);
  70
  71       static char_type
  72       to_char_type(const int_type& __c);
  73
  74       static int_type
  75       to_int_type(const char_type& __c);
  76
  77       static bool
  78       eq_int_type(const int_type& __c1, const int_type& __c2);
  79
  80       static int_type
  81       eof();
  82
  83       static int_type
  84       not_eof(const int_type& __c);
  85     };
  86 }
  87
  88 /*
  89 > how do I check that these conversions are correct?
  90 Very easy.  Since all the characters are from ASCII you simply
  91 zero-extend the values.
  92
  93 drepper$ echo 'black pearl jasmine tea' | od -t x1
  94 0000000 62 6c 61 63 6b 20 70 65 61 72 6c 20 6a 61 73 6d
  95 0000020 69 6e 65 20 74 65 61 0a
  96
  97 So the UCS-2 string is
  98
  99 0x0062, 0x006c, 0x0061, ...
 100
 101 You get the idea.  With iconv() you have to take care of the
 102 byte-order, though.  UCS-2 can mean little- or big endian.  Looking at
 103 your result
 104
 105 > $9 = 25856
 106
 107 it shows that the other byte-order is used (25856 == 0x6500).
 108 */
 109
 110
 111 void
 112 initialize_state(std::__enc_traits& state)
 113 { state._M_init(); }
 114
 115 // Partial specialization using __enc_traits.
 116 // codecvt<unicode_t, char, __enc_traits>
 117 // UNICODE - UCS2 (big endian)
 118 void test01()
 119 {
 120   using namespace std;
 121   typedef codecvt_base::result                  result;
 122   typedef unicode_t                             int_type;
 123   typedef char                                  ext_type;
 124   typedef __enc_traits                          enc_type;
 125   typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
 126   typedef char_traits<int_type>                 int_traits;
 127   typedef char_traits<ext_type>                 ext_traits;
 128
 129   bool                  test = true;
 130   const ext_type*       e_lit = "black pearl jasmine tea";
 131   int                   size = strlen(e_lit);
 132
 133   char  i_lit_base[50] __attribute__((aligned(__alignof__(int_type)))) =
 134   {
 135     0x00, 0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20,
 136     0x00, 0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20,
 137     0x00, 0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e,
 138     0x00, 0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0
 139   };
 140   const int_type*       i_lit = reinterpret_cast<int_type*>(i_lit_base);
 141
 142   const ext_type*       efrom_next;
 143   const int_type*       ifrom_next;
 144   ext_type*             e_arr = new ext_type[size + 1];
 145   ext_type*             eto_next;
 146   int_type*             i_arr = new int_type[size + 1];
 147   int_type*             ito_next;
 148
 149   // construct a locale object with the specialized facet.
 150   locale                loc(locale::classic(), new unicode_codecvt);
 151   // sanity check the constructed locale has the specialized facet.
 152   VERIFY( has_facet<unicode_codecvt>(loc) );
 153   const unicode_codecvt&        cvt = use_facet<unicode_codecvt>(loc);
 154
 155   // in
 156   //  unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
 157   unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0, 0);
 158   initialize_state(state01);
 159   // internal encoding is bigger because of bom
 160   result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
 161                      i_arr, i_arr + size + 1, ito_next);
 162   VERIFY( r1 == codecvt_base::ok );
 163   VERIFY( !int_traits::compare(i_arr, i_lit, size) );
 164   VERIFY( efrom_next == e_lit + size );
 165   VERIFY( ito_next == i_arr + size );
 166
 167   // out
 168   unicode_codecvt::state_type state02("UCS-2BE", "ISO-8859-15", 0, 0);
 169   initialize_state(state02);
 170   result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
 171                        e_arr, e_arr + size, eto_next);
 172   VERIFY( r2 == codecvt_base::ok );
 173   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
 174   VERIFY( ifrom_next == i_lit + size );
 175   VERIFY( eto_next == e_arr + size );
 176
 177   // unshift
 178   ext_traits::copy(e_arr, e_lit, size);
 179   unicode_codecvt::state_type state03("UCS-2BE", "ISO-8859-15", 0, 0);
 180   initialize_state(state03);
 181   result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
 182   VERIFY( r3 == codecvt_base::noconv );
 183   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
 184   VERIFY( eto_next == e_arr );
 185
 186   int i = cvt.encoding();
 187   VERIFY( i == 2 ); // Target-dependent.
 188
 189   VERIFY( !cvt.always_noconv() );
 190
 191   unicode_codecvt::state_type state04("UCS-2BE", "ISO-8859-15", 0, 0);
 192   initialize_state(state04);
 193   int j = cvt.length(state03, e_lit, e_lit + size, 5);
 194   VERIFY( j == 5 );
 195
 196   int k = cvt.max_length();
 197   VERIFY( k == 1 );
 198
 199   delete [] e_arr;
 200   delete [] i_arr;
 201 }
 202
 203 // Partial specialization using __enc_traits.
 204 // codecvt<unicode_t, char, __enc_traits>
 205 // UNICODE - UCS2 (little endian)
 206 void test02()
 207 {
 208   using namespace std;
 209   typedef codecvt_base::result                  result;
 210   typedef unsigned short                        unicode_t;
 211   typedef unicode_t                             int_type;
 212   typedef char                                  ext_type;
 213   typedef __enc_traits                          enc_type;
 214   typedef codecvt<int_type, ext_type, enc_type> unicode_codecvt;
 215   typedef char_traits<int_type>                 int_traits;
 216   typedef char_traits<ext_type>                 ext_traits;
 217
 218   bool                  test = true;
 219   const ext_type*       e_lit = "black pearl jasmine tea";
 220   int                   size = strlen(e_lit);
 221
 222   char  i_lit_base[50] __attribute__((aligned(__alignof__(int_type)))) =
 223   {
 224     0x62, 0x00, 0x6c, 0x00, 0x61, 0x00, 0x63, 0x00, 0x6b, 0x00, 0x20, 0x00,
 225     0x70, 0x00, 0x65, 0x00, 0x61, 0x00, 0x72, 0x00, 0x6c, 0x00, 0x20, 0x00,
 226     0x6a, 0x00, 0x61, 0x00, 0x73, 0x00, 0x6d, 0x00, 0x69, 0x00, 0x6e, 0x00,
 227     0x65, 0x00, 0x20, 0x00, 0x74, 0x00, 0x65, 0x00, 0x61, 0x00, 0xa0, 0x00
 228   };
 229   const int_type*       i_lit = reinterpret_cast<int_type*>(i_lit_base);
 230
 231   const ext_type*       efrom_next;
 232   const int_type*       ifrom_next;
 233   ext_type*             e_arr = new ext_type[size + 1];
 234   ext_type*             eto_next;
 235   int_type*             i_arr = new int_type[size + 1];
 236   int_type*             ito_next;
 237
 238   // construct a locale object with the specialized facet.
 239   locale                loc(locale::classic(), new unicode_codecvt);
 240   // sanity check the constructed locale has the specialized facet.
 241   VERIFY( has_facet<unicode_codecvt>(loc) );
 242   const unicode_codecvt&        cvt = use_facet<unicode_codecvt>(loc);
 243
 244   // in
 245   unicode_codecvt::state_type state01("UCS-2LE", "ISO-8859-15", 0, 0);
 246   initialize_state(state01);
 247   // internal encoding is bigger because of bom
 248   result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
 249                      i_arr, i_arr + size + 1, ito_next);
 250   VERIFY( r1 == codecvt_base::ok );
 251   VERIFY( !int_traits::compare(i_arr, i_lit, size) );
 252   VERIFY( efrom_next == e_lit + size );
 253   VERIFY( ito_next == i_arr + size );
 254
 255   // out
 256   unicode_codecvt::state_type state02("UCS-2LE", "ISO-8859-15", 0, 0);
 257   initialize_state(state02);
 258   result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
 259                        e_arr, e_arr + size, eto_next);
 260   VERIFY( r2 == codecvt_base::ok );
 261   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
 262   VERIFY( ifrom_next == i_lit + size );
 263   VERIFY( eto_next == e_arr + size );
 264
 265   // unshift
 266   ext_traits::copy(e_arr, e_lit, size);
 267   unicode_codecvt::state_type state03("UCS-2LE", "ISO-8859-15", 0, 0);
 268   initialize_state(state03);
 269   result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
 270   VERIFY( r3 == codecvt_base::noconv );
 271   VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
 272   VERIFY( eto_next == e_arr );
 273
 274   int i = cvt.encoding();
 275   VERIFY( i == 2 ); // Target-dependent.
 276
 277   VERIFY( !cvt.always_noconv() );
 278
 279   unicode_codecvt::state_type state04("UCS-2LE", "ISO-8859-15", 0, 0);
 280   initialize_state(state04);
 281   int j = cvt.length(state03, e_lit, e_lit + size, 5);
 282   VERIFY( j == 5 );
 283
 284   int k = cvt.max_length();
 285   VERIFY( k == 1 );
 286
 287   delete [] e_arr;
 288   delete [] i_arr;
 289 }
 290
 291 #endif // _GLIBCXX_USE___ENC_TRAITS
 292
 293 int main ()
 294 {
 295 #if _GLIBCXX_USE___ENC_TRAITS
 296   test01();
 297   test02();
 298 #endif
 299   return 0;
 300 }