gdb/charset.h

   1 /* Character set conversion support for GDB.
   2    Copyright (C) 2001-2024 Free Software Foundation, Inc.
   3
   4    This file is part of GDB.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  18
  19 #ifndef CHARSET_H
  20 #define CHARSET_H
  21
  22 #include "gdbsupport/def-vector.h"
  23
  24 /* If the target program uses a different character set than the host,
  25    GDB has some support for translating between the two; GDB converts
  26    characters and strings to the host character set before displaying
  27    them, and converts characters and strings appearing in expressions
  28    entered by the user to the target character set.
  29
  30    GDB's code pretty much assumes that the host character set is some
  31    superset of ASCII; there are plenty if ('0' + n) expressions and
  32    the like.  */
  33
  34 /* Return the name of the current host/target character set.  The
  35    result is owned by the charset module; the caller should not free
  36    it.  */
  37 const char *host_charset (void);
  38 const char *target_charset (struct gdbarch *gdbarch);
  39 const char *target_wide_charset (struct gdbarch *gdbarch);
  40
  41 /* These values are used to specify the type of transliteration done
  42    by convert_between_encodings.  */
  43 enum transliterations
  44   {
  45     /* Error on failure to convert.  */
  46     translit_none,
  47     /* Transliterate to host char.  */
  48     translit_char
  49   };
  50
  51 /* Convert between two encodings.
  52
  53    FROM is the name of the source encoding.
  54    TO is the name of the target encoding.
  55    BYTES holds the bytes to convert; this is assumed to be characters
  56    in the target encoding.
  57    NUM_BYTES is the number of bytes.
  58    WIDTH is the width of a character from the FROM charset, in bytes.
  59    For a variable width encoding, WIDTH should be the size of a "base
  60    character".
  61    OUTPUT is an obstack where the converted data is written.  The
  62    caller is responsible for initializing the obstack, and for
  63    destroying the obstack should an error occur.
  64    TRANSLIT specifies how invalid conversions should be handled.  */
  65
  66 void convert_between_encodings (const char *from, const char *to,
  67                                 const gdb_byte *bytes,
  68                                 unsigned int num_bytes,
  69                                 int width, struct obstack *output,
  70                                 enum transliterations translit);
  71
  72
  73 /* These values are used by wchar_iterate to report errors.  */
  74 enum wchar_iterate_result
  75   {
  76     /* Ordinary return.  */
  77     wchar_iterate_ok,
  78     /* Invalid input sequence.  */
  79     wchar_iterate_invalid,
  80     /* Incomplete input sequence at the end of the input.  */
  81     wchar_iterate_incomplete,
  82     /* EOF.  */
  83     wchar_iterate_eof
  84   };
  85
  86 /* An iterator that returns host wchar_t's from a target string.  */
  87 class wchar_iterator
  88 {
  89  public:
  90
  91   /* Create a new character iterator which returns wchar_t's.  INPUT is
  92      the input buffer.  BYTES is the number of bytes in the input
  93      buffer.  CHARSET is the name of the character set in which INPUT is
  94      encoded.  WIDTH is the number of bytes in a base character of
  95      CHARSET.
  96
  97      This constructor can throw on error.  */
  98   wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
  99                   size_t width);
 100
 101   ~wchar_iterator ();
 102
 103   /* Perform a single iteration of a wchar_t iterator.
 104
 105      Returns the number of characters converted.  A negative result
 106      means that EOF has been reached.  A positive result indicates the
 107      number of valid wchar_ts in the result; *OUT_CHARS is updated to
 108      point to the first valid character.
 109
 110      In all cases aside from EOF, *PTR is set to point to the first
 111      converted target byte.  *LEN is set to the number of bytes
 112      converted.
 113
 114      A zero result means one of several unusual results.  *OUT_RESULT is
 115      set to indicate the type of un-ordinary return.
 116
 117      wchar_iterate_invalid means that an invalid input character was
 118      seen.  The iterator is advanced by WIDTH (the argument to
 119      the wchar_iterator constructor) bytes.
 120
 121      wchar_iterate_incomplete means that an incomplete character was
 122      seen at the end of the input sequence.
 123
 124      wchar_iterate_eof means that all bytes were successfully
 125      converted.  The other output arguments are not set.  */
 126   int iterate (enum wchar_iterate_result *out_result, gdb_wchar_t **out_chars,
 127                const gdb_byte **ptr, size_t *len);
 128
 129  private:
 130
 131   /* The underlying iconv descriptor.  */
 132 #ifdef PHONY_ICONV
 133   int m_desc;
 134 #else
 135   iconv_t m_desc;
 136 #endif
 137
 138   /* The input string.  This is updated as we convert characters.  */
 139   const gdb_byte *m_input;
 140   /* The number of bytes remaining in the input.  */
 141   size_t m_bytes;
 142
 143   /* The width of an input character.  */
 144   size_t m_width;
 145
 146   /* The output buffer.  */
 147   gdb::def_vector<gdb_wchar_t> m_out;
 148 };
 149
 150 \f
 151
 152 /* GDB needs to know a few details of its execution character set.
 153    This knowledge is isolated here and in charset.c.  */
 154
 155 /* The escape character.  */
 156 #define HOST_ESCAPE_CHAR 27
 157
 158 /* Convert a letter, like 'c', to its corresponding control
 159    character.  */
 160 char host_letter_to_control_character (char c);
 161
 162 #if WORDS_BIGENDIAN
 163 #define HOST_UTF32 "UTF-32BE"
 164 #else
 165 #define HOST_UTF32 "UTF-32LE"
 166 #endif
 167
 168 #endif /* CHARSET_H */