src/coding.h

   1 /* Header for coding system handler.
   2    Copyright (C) 2001-2016 Free Software Foundation, Inc.
   3    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4      2005, 2006, 2007, 2008, 2009, 2010, 2011
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H14PRO021
   7    Copyright (C) 2003
   8      National Institute of Advanced Industrial Science and Technology (AIST)
   9      Registration Number H13PRO009
  10
  11 This file is part of GNU Emacs.
  12
  13 GNU Emacs is free software: you can redistribute it and/or modify
  14 it under the terms of the GNU General Public License as published by
  15 the Free Software Foundation, either version 3 of the License, or (at
  16 your option) any later version.
  17
  18 GNU Emacs is distributed in the hope that it will be useful,
  19 but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 GNU General Public License for more details.
  22
  23 You should have received a copy of the GNU General Public License
  24 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  25
  26 #ifndef EMACS_CODING_H
  27 #define EMACS_CODING_H
  28
  29 #include "lisp.h"
  30
  31 /* Index to arguments of Fdefine_coding_system_internal.  */
  32
  33 enum define_coding_system_arg_index
  34   {
  35     coding_arg_name,
  36     coding_arg_mnemonic,
  37     coding_arg_coding_type,
  38     coding_arg_charset_list,
  39     coding_arg_ascii_compatible_p,
  40     coding_arg_decode_translation_table,
  41     coding_arg_encode_translation_table,
  42     coding_arg_post_read_conversion,
  43     coding_arg_pre_write_conversion,
  44     coding_arg_default_char,
  45     coding_arg_for_unibyte,
  46     coding_arg_plist,
  47     coding_arg_eol_type,
  48     coding_arg_max
  49   };
  50
  51 enum define_coding_iso2022_arg_index
  52   {
  53     coding_arg_iso2022_initial = coding_arg_max,
  54     coding_arg_iso2022_reg_usage,
  55     coding_arg_iso2022_request,
  56     coding_arg_iso2022_flags,
  57     coding_arg_iso2022_max
  58   };
  59
  60 enum define_coding_utf8_arg_index
  61   {
  62     coding_arg_utf8_bom = coding_arg_max,
  63     coding_arg_utf8_max
  64   };
  65
  66 enum define_coding_utf16_arg_index
  67   {
  68     coding_arg_utf16_bom = coding_arg_max,
  69     coding_arg_utf16_endian,
  70     coding_arg_utf16_max
  71   };
  72
  73 enum define_coding_ccl_arg_index
  74   {
  75     coding_arg_ccl_decoder = coding_arg_max,
  76     coding_arg_ccl_encoder,
  77     coding_arg_ccl_valids,
  78     coding_arg_ccl_max
  79   };
  80
  81 enum define_coding_undecided_arg_index
  82   {
  83     coding_arg_undecided_inhibit_null_byte_detection = coding_arg_max,
  84     coding_arg_undecided_inhibit_iso_escape_detection,
  85     coding_arg_undecided_prefer_utf_8,
  86     coding_arg_undecided_max
  87   };
  88
  89 /* Hash table for all coding systems.  Keys are coding system symbols
  90    and values are spec vectors of the corresponding coding system.  A
  91    spec vector has the form [ ATTRS ALIASES EOL-TYPE ].  ATTRS is a
  92    vector of attribute of the coding system.  ALIASES is a list of
  93    aliases (symbols) of the coding system.  EOL-TYPE is `unix', `dos',
  94    `mac' or a vector of coding systems (symbols).  */
  95
  96 extern Lisp_Object Vcoding_system_hash_table;
  97
  98
  99 /* Enumeration of coding system type.  */
 100
 101 enum coding_system_type
 102   {
 103     coding_type_charset,
 104     coding_type_utf_8,
 105     coding_type_utf_16,
 106     coding_type_iso_2022,
 107     coding_type_emacs_mule,
 108     coding_type_sjis,
 109     coding_type_ccl,
 110     coding_type_raw_text,
 111     coding_type_undecided,
 112     coding_type_max
 113   };
 114
 115
 116 /* Enumeration of end-of-line format type.  */
 117
 118 enum end_of_line_type
 119   {
 120     eol_lf,             /* Line-feed only, same as Emacs' internal
 121                            format.  */
 122     eol_crlf,           /* Sequence of carriage-return and
 123                            line-feed.  */
 124     eol_cr,             /* Carriage-return only.  */
 125     eol_any,            /* Accept any of above.  Produce line-feed
 126                            only.  */
 127     eol_undecided,      /* This value is used to denote that the
 128                            eol-type is not yet undecided.  */
 129     eol_type_max
 130   };
 131
 132 /* Enumeration of index to an attribute vector of a coding system.  */
 133
 134 enum coding_attr_index
 135   {
 136     coding_attr_base_name,
 137     coding_attr_docstring,
 138     coding_attr_mnemonic,
 139     coding_attr_type,
 140     coding_attr_charset_list,
 141     coding_attr_ascii_compat,
 142     coding_attr_decode_tbl,
 143     coding_attr_encode_tbl,
 144     coding_attr_trans_tbl,
 145     coding_attr_post_read,
 146     coding_attr_pre_write,
 147     coding_attr_default_char,
 148     coding_attr_for_unibyte,
 149     coding_attr_plist,
 150
 151     coding_attr_category,
 152     coding_attr_safe_charsets,
 153
 154     /* The followings are extra attributes for each type.  */
 155     coding_attr_charset_valids,
 156
 157     coding_attr_ccl_decoder,
 158     coding_attr_ccl_encoder,
 159     coding_attr_ccl_valids,
 160
 161     coding_attr_iso_initial,
 162     coding_attr_iso_usage,
 163     coding_attr_iso_request,
 164     coding_attr_iso_flags,
 165
 166     coding_attr_utf_bom,
 167     coding_attr_utf_16_endian,
 168
 169     coding_attr_emacs_mule_full,
 170
 171     coding_attr_undecided_inhibit_null_byte_detection,
 172     coding_attr_undecided_inhibit_iso_escape_detection,
 173     coding_attr_undecided_prefer_utf_8,
 174
 175     coding_attr_last_index
 176   };
 177
 178
 179 /* Macros to access an element of an attribute vector.  */
 180
 181 #define CODING_ATTR_BASE_NAME(attrs)    AREF (attrs, coding_attr_base_name)
 182 #define CODING_ATTR_TYPE(attrs)         AREF (attrs, coding_attr_type)
 183 #define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
 184 #define CODING_ATTR_MNEMONIC(attrs)     AREF (attrs, coding_attr_mnemonic)
 185 #define CODING_ATTR_DOCSTRING(attrs)    AREF (attrs, coding_attr_docstring)
 186 #define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
 187 #define CODING_ATTR_DECODE_TBL(attrs)   AREF (attrs, coding_attr_decode_tbl)
 188 #define CODING_ATTR_ENCODE_TBL(attrs)   AREF (attrs, coding_attr_encode_tbl)
 189 #define CODING_ATTR_TRANS_TBL(attrs)    AREF (attrs, coding_attr_trans_tbl)
 190 #define CODING_ATTR_POST_READ(attrs)    AREF (attrs, coding_attr_post_read)
 191 #define CODING_ATTR_PRE_WRITE(attrs)    AREF (attrs, coding_attr_pre_write)
 192 #define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
 193 #define CODING_ATTR_FOR_UNIBYTE(attrs)  AREF (attrs, coding_attr_for_unibyte)
 194 #define CODING_ATTR_PLIST(attrs)        AREF (attrs, coding_attr_plist)
 195 #define CODING_ATTR_CATEGORY(attrs)     AREF (attrs, coding_attr_category)
 196 #define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
 197
 198
 199 /* Return the name of a coding system specified by ID.  */
 200 #define CODING_ID_NAME(id) \
 201   (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
 202
 203 /* Return the attribute vector of a coding system specified by ID.  */
 204
 205 #define CODING_ID_ATTRS(id)     \
 206   (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
 207
 208 /* Return the list of aliases of a coding system specified by ID.  */
 209
 210 #define CODING_ID_ALIASES(id)   \
 211   (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
 212
 213 /* Return the eol-type of a coding system specified by ID.  */
 214
 215 #define CODING_ID_EOL_TYPE(id)  \
 216   (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
 217
 218
 219 /* Return the spec vector of CODING_SYSTEM_SYMBOL.  */
 220
 221 #define CODING_SYSTEM_SPEC(coding_system_symbol)        \
 222   (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
 223
 224
 225 /* Return the ID of CODING_SYSTEM_SYMBOL.  */
 226
 227 #define CODING_SYSTEM_ID(coding_system_symbol)                  \
 228   hash_lookup (XHASH_TABLE (Vcoding_system_hash_table),         \
 229                coding_system_symbol, NULL)
 230
 231 /* Return true if CODING_SYSTEM_SYMBOL is a coding system.  */
 232
 233 #define CODING_SYSTEM_P(coding_system_symbol)           \
 234   (CODING_SYSTEM_ID (coding_system_symbol) >= 0         \
 235    || (! NILP (coding_system_symbol)                    \
 236        && ! NILP (Fcoding_system_p (coding_system_symbol))))
 237
 238 /* Check if X is a coding system or not.  */
 239
 240 #define CHECK_CODING_SYSTEM(x)                          \
 241   do {                                                  \
 242     if (CODING_SYSTEM_ID (x) < 0                        \
 243         && NILP (Fcheck_coding_system (x)))             \
 244       wrong_type_argument (Qcoding_system_p, (x));      \
 245   } while (false)
 246
 247
 248 /* Check if X is a coding system or not.  If it is, set SEPC to the
 249    spec vector of the coding system.  */
 250
 251 #define CHECK_CODING_SYSTEM_GET_SPEC(x, spec)           \
 252   do {                                                  \
 253     spec = CODING_SYSTEM_SPEC (x);                      \
 254     if (NILP (spec))                                    \
 255       {                                                 \
 256         Fcheck_coding_system (x);                       \
 257         spec = CODING_SYSTEM_SPEC (x);                  \
 258       }                                                 \
 259     if (NILP (spec))                                    \
 260       wrong_type_argument (Qcoding_system_p, (x));      \
 261   } while (false)
 262
 263
 264 /* Check if X is a coding system or not.  If it is, set ID to the
 265    ID of the coding system.  */
 266
 267 #define CHECK_CODING_SYSTEM_GET_ID(x, id)                       \
 268   do                                                            \
 269     {                                                           \
 270       id = CODING_SYSTEM_ID (x);                                \
 271       if (id < 0)                                               \
 272         {                                                       \
 273           Fcheck_coding_system (x);                             \
 274           id = CODING_SYSTEM_ID (x);                            \
 275         }                                                       \
 276       if (id < 0)                                               \
 277         wrong_type_argument (Qcoding_system_p, (x));    \
 278     } while (false)
 279
 280
 281 /*** GENERAL section ***/
 282
 283 /* Enumeration of result code of code conversion.  */
 284 enum coding_result_code
 285   {
 286     CODING_RESULT_SUCCESS,
 287     CODING_RESULT_INSUFFICIENT_SRC,
 288     CODING_RESULT_INSUFFICIENT_DST,
 289     CODING_RESULT_INVALID_SRC,
 290     CODING_RESULT_INTERRUPT
 291   };
 292
 293
 294 /* Macros used for the member `mode' of the struct coding_system.  */
 295
 296 /* If set, the decoding/encoding routines treat the current data as
 297    the last block of the whole text to be converted, and do the
 298    appropriate finishing job.  */
 299 #define CODING_MODE_LAST_BLOCK                  0x01
 300
 301 /* If set, it means that the current source text is in a buffer which
 302    enables selective display.  */
 303 #define CODING_MODE_SELECTIVE_DISPLAY           0x02
 304
 305 /* This flag is used by the decoding/encoding routines on the fly.  If
 306    set, it means that right-to-left text is being processed.  */
 307 #define CODING_MODE_DIRECTION                   0x04
 308
 309 #define CODING_MODE_FIXED_DESTINATION           0x08
 310
 311 /* If set, it means that the encoding routines produces some safe
 312    ASCII characters (usually '?') for unsupported characters.  */
 313 #define CODING_MODE_SAFE_ENCODING               0x10
 314
 315   /* For handling composition sequence.  */
 316 #include "composite.h"
 317
 318 enum composition_state
 319   {
 320     COMPOSING_NO,
 321     COMPOSING_CHAR,
 322     COMPOSING_RULE,
 323     COMPOSING_COMPONENT_CHAR,
 324     COMPOSING_COMPONENT_RULE
 325   };
 326
 327 /* Structure for the current composition status.  */
 328 struct composition_status
 329 {
 330   enum composition_state state;
 331   enum composition_method method;
 332   bool old_form;          /* true if pre-21 form */
 333   int length;             /* number of elements produced in charbuf */
 334   int nchars;             /* number of characters composed */
 335   int ncomps;             /* number of composition components */
 336   /* Maximum carryover is for the case of COMPOSITION_WITH_RULE_ALTCHARS.
 337      See the comment in coding.c.  */
 338   int carryover[4               /* annotation header */
 339                 + MAX_COMPOSITION_COMPONENTS * 3 - 2 /* ALTs and RULEs */
 340                 + 2                                  /* intermediate -1 -1 */
 341                 + MAX_COMPOSITION_COMPONENTS         /* CHARs */
 342                 ];
 343 };
 344
 345
 346 /* Structure of the field `spec.iso_2022' in the structure
 347    `coding_system'.  */
 348 struct iso_2022_spec
 349 {
 350   /* Bit-wise-or of CODING_ISO_FLAG_XXX.  */
 351   unsigned flags;
 352
 353   /* The current graphic register invoked to each graphic plane.  */
 354   int current_invocation[2];
 355
 356   /* The current charset designated to each graphic register.  The
 357      value -1 means that not charset is designated, -2 means that
 358      there was an invalid designation previously.  */
 359   int current_designation[4];
 360
 361   /* If positive, we are now scanning CTEXT extended segment.  */
 362   int ctext_extended_segment_len;
 363
 364   /* True temporarily only when graphic register 2 or 3 is invoked by
 365      single-shift while encoding.  */
 366   bool_bf single_shifting : 1;
 367
 368   /* True temporarily only when processing at beginning of line.  */
 369   bool_bf bol : 1;
 370
 371   /* If true, we are now scanning embedded UTF-8 sequence.  */
 372   bool_bf embedded_utf_8 : 1;
 373
 374   /* The current composition.  */
 375   struct composition_status cmp_status;
 376 };
 377
 378 struct emacs_mule_spec
 379 {
 380   struct composition_status cmp_status;
 381 };
 382
 383 struct undecided_spec
 384 {
 385   /* Inhibit null byte detection.  1 means always inhibit,
 386      -1 means do not inhibit, 0 means rely on user variable.  */
 387   int inhibit_nbd;
 388
 389   /* Inhibit ISO escape detection.  -1, 0, 1 as above.  */
 390   int inhibit_ied;
 391
 392   /* Prefer UTF-8 when the input could be other encodings.  */
 393   bool prefer_utf_8;
 394 };
 395
 396 enum utf_bom_type
 397   {
 398     utf_detect_bom,
 399     utf_without_bom,
 400     utf_with_bom
 401   };
 402
 403 enum utf_16_endian_type
 404   {
 405     utf_16_big_endian,
 406     utf_16_little_endian
 407   };
 408
 409 struct utf_16_spec
 410 {
 411   enum utf_bom_type bom;
 412   enum utf_16_endian_type endian;
 413   int surrogate;
 414 };
 415
 416 struct coding_detection_info
 417 {
 418   /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs.  */
 419   /* Which categories are already checked.  */
 420   int checked;
 421   /* Which categories are strongly found.  */
 422   int found;
 423   /* Which categories are rejected.  */
 424   int rejected;
 425 };
 426
 427
 428 struct coding_system
 429 {
 430   /* ID number of the coding system.  This is an index to
 431      Vcoding_system_hash_table.  This value is set by
 432      setup_coding_system.  At the early stage of building time, this
 433      value is -1 in the array coding_categories to indicate that no
 434      coding-system of that category is yet defined.  */
 435   ptrdiff_t id;
 436
 437   /* Flag bits of the coding system.  The meaning of each bit is common
 438      to all types of coding systems.  */
 439   unsigned common_flags : 14;
 440
 441   /* Mode bits of the coding system.  See the comments of the macros
 442      CODING_MODE_XXX.  */
 443   unsigned mode : 5;
 444
 445   /* The following two members specify how binary 8-bit code 128..255
 446      are represented in source and destination text respectively.  True
 447      means they are represented by 2-byte sequence, false means they are
 448      represented by 1-byte as is (see the comment in character.h).  */
 449   bool_bf src_multibyte : 1;
 450   bool_bf dst_multibyte : 1;
 451
 452   /* True if the source of conversion is not in the member
 453      `charbuf', but at `src_object'.  */
 454   bool_bf chars_at_source : 1;
 455
 456   /* Nonzero if the result of conversion is in `destination'
 457      buffer rather than in `dst_object'.  */
 458   bool_bf raw_destination : 1;
 459
 460   /* Set to true if charbuf contains an annotation.  */
 461   bool_bf annotated : 1;
 462
 463   /* Used internally in coding.c.  See the comment of detect_ascii.  */
 464   unsigned eol_seen : 3;
 465
 466   /* Finish status of code conversion.  */
 467   ENUM_BF (coding_result_code) result : 3;
 468
 469   int max_charset_id;
 470
 471   /* Detailed information specific to each type of coding system.  */
 472   union
 473     {
 474       struct iso_2022_spec iso_2022;
 475       struct ccl_spec *ccl;     /* Defined in ccl.h.  */
 476       struct utf_16_spec utf_16;
 477       enum utf_bom_type utf_8_bom;
 478       struct emacs_mule_spec emacs_mule;
 479       struct undecided_spec undecided;
 480     } spec;
 481
 482   unsigned char *safe_charsets;
 483
 484   /* How may heading bytes we can skip for decoding.  This is set to
 485      -1 in setup_coding_system, and updated by detect_coding.  So,
 486      when this is equal to the byte length of the text being
 487      converted, we can skip the actual conversion process except for
 488      the eol format.  */
 489   ptrdiff_t head_ascii;
 490
 491   /* How many bytes/chars at the source are detected as valid utf-8
 492      sequence.  Set by detect_coding_utf_8.  */
 493   ptrdiff_t detected_utf8_bytes, detected_utf8_chars;
 494
 495   /* The following members are set by encoding/decoding routine.  */
 496   ptrdiff_t produced, produced_char, consumed, consumed_char;
 497
 498   ptrdiff_t src_pos, src_pos_byte, src_chars, src_bytes;
 499   Lisp_Object src_object;
 500   const unsigned char *source;
 501
 502   ptrdiff_t dst_pos, dst_pos_byte, dst_bytes;
 503   Lisp_Object dst_object;
 504   unsigned char *destination;
 505
 506   /* If an element is non-negative, it is a character code.
 507
 508      If it is in the range -128..-1, it is a 8-bit character code
 509      minus 256.
 510
 511      If it is less than -128, it specifies the start of an annotation
 512      chunk.  The length of the chunk is -128 minus the value of the
 513      element.  The following elements are OFFSET, ANNOTATION-TYPE, and
 514      a sequence of actual data for the annotation.  OFFSET is a
 515      character position offset from dst_pos or src_pos,
 516      ANNOTATION-TYPE specifies the meaning of the annotation and how to
 517      handle the following data..  */
 518   int *charbuf;
 519   int charbuf_size, charbuf_used;
 520
 521   unsigned char carryover[64];
 522   int carryover_bytes;
 523
 524   int default_char;
 525
 526   bool (*detector) (struct coding_system *, struct coding_detection_info *);
 527   void (*decoder) (struct coding_system *);
 528   bool (*encoder) (struct coding_system *);
 529 };
 530
 531 /* Meanings of bits in the member `common_flags' of the structure
 532    coding_system.  The lowest 8 bits are reserved for various kind of
 533    annotations (currently two of them are used).  */
 534 #define CODING_ANNOTATION_MASK                  0x00FF
 535 #define CODING_ANNOTATE_COMPOSITION_MASK        0x0001
 536 #define CODING_ANNOTATE_DIRECTION_MASK          0x0002
 537 #define CODING_ANNOTATE_CHARSET_MASK            0x0003
 538 #define CODING_FOR_UNIBYTE_MASK                 0x0100
 539 #define CODING_REQUIRE_FLUSHING_MASK            0x0200
 540 #define CODING_REQUIRE_DECODING_MASK            0x0400
 541 #define CODING_REQUIRE_ENCODING_MASK            0x0800
 542 #define CODING_REQUIRE_DETECTION_MASK           0x1000
 543 #define CODING_RESET_AT_BOL_MASK                0x2000
 544
 545 /* Return nonzero if the coding context CODING requires annotation
 546    handling.  */
 547 #define CODING_REQUIRE_ANNOTATION(coding) \
 548   ((coding)->common_flags & CODING_ANNOTATION_MASK)
 549
 550 /* Return nonzero if the coding context CODING prefers decoding into
 551    unibyte.  */
 552 #define CODING_FOR_UNIBYTE(coding) \
 553   ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
 554
 555 /* Return nonzero if the coding context CODING requires specific code to be
 556    attached at the tail of converted text.  */
 557 #define CODING_REQUIRE_FLUSHING(coding) \
 558   ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
 559
 560 /* Return nonzero if the coding context CODING requires code conversion on
 561    decoding.  */
 562 #define CODING_REQUIRE_DECODING(coding) \
 563   ((coding)->dst_multibyte              \
 564    || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
 565
 566
 567 /* Return nonzero if the coding context CODING requires code conversion on
 568    encoding.
 569    The non-multibyte part of the condition is to support encoding of
 570    unibyte strings/buffers generated by string-as-unibyte or
 571    (set-buffer-multibyte nil) from multibyte strings/buffers.  */
 572 #define CODING_REQUIRE_ENCODING(coding)                         \
 573   ((coding)->src_multibyte                                      \
 574    || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK     \
 575    || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
 576
 577
 578 /* Return nonzero if the coding context CODING requires some kind of code
 579    detection.  */
 580 #define CODING_REQUIRE_DETECTION(coding) \
 581   ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
 582
 583 /* Return nonzero if the coding context CODING requires code conversion on
 584    decoding or some kind of code detection.  */
 585 #define CODING_MAY_REQUIRE_DECODING(coding)     \
 586   (CODING_REQUIRE_DECODING (coding)             \
 587    || CODING_REQUIRE_DETECTION (coding))
 588
 589 /* Macros to decode or encode a character of JISX0208 in SJIS.  S1 and
 590    S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
 591    system.  C1 and C2 are the 1st and 2nd position codes of Emacs'
 592    internal format.  */
 593
 594 #define SJIS_TO_JIS(code)                               \
 595   do {                                                  \
 596     int s1, s2, j1, j2;                                 \
 597                                                         \
 598     s1 = (code) >> 8, s2 = (code) & 0xFF;               \
 599                                                         \
 600     if (s2 >= 0x9F)                                     \
 601       (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0),       \
 602        j2 = s2 - 0x7E);                                 \
 603     else                                                \
 604       (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1),     \
 605        j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F));         \
 606     (code) = (j1 << 8) | j2;                            \
 607   } while (false)
 608
 609 #define SJIS_TO_JIS2(code)                              \
 610   do {                                                  \
 611     int s1, s2, j1, j2;                                 \
 612                                                         \
 613     s1 = (code) >> 8, s2 = (code) & 0xFF;               \
 614                                                         \
 615     if (s2 >= 0x9F)                                     \
 616       {                                                 \
 617         j1 = (s1 == 0xF0 ? 0x28                         \
 618               : s1 == 0xF1 ? 0x24                       \
 619               : s1 == 0xF2 ? 0x2C                       \
 620               : s1 == 0xF3 ? 0x2E                       \
 621               : 0x6E + (s1 - 0xF4) * 2);                \
 622         j2 = s2 - 0x7E;                                 \
 623       }                                                 \
 624     else                                                \
 625       {                                                 \
 626         j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2       \
 627               : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2     \
 628               : 0x6F + (s1 - 0xF5) * 2);                \
 629         j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F));         \
 630       }                                                 \
 631     (code) = (j1 << 8) | j2;                            \
 632   } while (false)
 633
 634
 635 #define JIS_TO_SJIS(code)                               \
 636   do {                                                  \
 637     int s1, s2, j1, j2;                                 \
 638                                                         \
 639     j1 = (code) >> 8, j2 = (code) & 0xFF;               \
 640     if (j1 & 1)                                         \
 641       (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1),       \
 642        s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F));         \
 643     else                                                \
 644       (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0),       \
 645        s2 = j2 + 0x7E);                                 \
 646     (code) = (s1 << 8) | s2;                            \
 647   } while (false)
 648
 649 #define JIS_TO_SJIS2(code)                              \
 650   do {                                                  \
 651     int s1, s2, j1, j2;                                 \
 652                                                         \
 653     j1 = (code) >> 8, j2 = (code) & 0xFF;               \
 654     if (j1 & 1)                                         \
 655       {                                                 \
 656         s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2       \
 657               : j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2     \
 658               : 0xF5 + (j1 - 0x6F) / 2);                \
 659         s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F);         \
 660       }                                                 \
 661     else                                                \
 662       {                                                 \
 663         s1 = (j1 == 0x28 ? 0xF0                         \
 664               : j1 == 0x24 ? 0xF1                       \
 665               : j1 == 0x2C ? 0xF2                       \
 666               : j1 == 0x2E ? 0xF3                       \
 667               : 0xF4 + (j1 - 0x6E) / 2);                \
 668         s2 = j2 + 0x7E;                                 \
 669       }                                                 \
 670     (code) = (s1 << 8) | s2;                            \
 671   } while (false)
 672
 673 /* Encode the file name NAME using the specified coding system
 674    for file names, if any.  */
 675 #define ENCODE_FILE(NAME)  encode_file_name (NAME)
 676
 677 /* Decode the file name NAME using the specified coding system
 678    for file names, if any.  */
 679 #define DECODE_FILE(NAME)  decode_file_name (NAME)
 680
 681 /* Encode the string STR using the specified coding system
 682    for system functions, if any.  */
 683 #define ENCODE_SYSTEM(str)                                                 \
 684   (! NILP (Vlocale_coding_system)                                          \
 685    ? code_convert_string_norecord (str, Vlocale_coding_system, true)       \
 686    : str)
 687
 688 /* Decode the string STR using the specified coding system
 689    for system functions, if any.  */
 690 #define DECODE_SYSTEM(str)                                                 \
 691   (! NILP (Vlocale_coding_system)                                          \
 692    ? code_convert_string_norecord (str, Vlocale_coding_system, false)      \
 693    : str)
 694
 695 /* Note that this encodes utf-8, not utf-8-emacs, so it's not a no-op.  */
 696 #define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, true)
 697
 698 /* Extern declarations.  */
 699 extern Lisp_Object code_conversion_save (bool, bool);
 700 extern void setup_coding_system (Lisp_Object, struct coding_system *);
 701 extern Lisp_Object coding_charset_list (struct coding_system *);
 702 extern Lisp_Object coding_system_charset_list (Lisp_Object);
 703 extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
 704                                         Lisp_Object, bool, bool, bool);
 705 extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
 706                                                  bool);
 707 extern Lisp_Object encode_file_name (Lisp_Object);
 708 extern Lisp_Object decode_file_name (Lisp_Object);
 709 extern Lisp_Object raw_text_coding_system (Lisp_Object);
 710 extern bool raw_text_coding_system_p (struct coding_system *);
 711 extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
 712 extern Lisp_Object complement_process_encoding_system (Lisp_Object);
 713
 714 extern void decode_coding_gap (struct coding_system *,
 715                                ptrdiff_t, ptrdiff_t);
 716 extern void decode_coding_object (struct coding_system *,
 717                                   Lisp_Object, ptrdiff_t, ptrdiff_t,
 718                                   ptrdiff_t, ptrdiff_t, Lisp_Object);
 719 extern void encode_coding_object (struct coding_system *,
 720                                   Lisp_Object, ptrdiff_t, ptrdiff_t,
 721                                   ptrdiff_t, ptrdiff_t, Lisp_Object);
 722
 723 #if defined (WINDOWSNT) || defined (CYGWIN)
 724
 725 /* These functions use Lisp string objects to store the UTF-16LE
 726    strings that modern versions of Windows expect.  These strings are
 727    not particularly useful to Lisp, and all Lisp strings should be
 728    native Emacs multibyte.  */
 729
 730 /* Access the wide-character string stored in a Lisp string object.  */
 731 #define WCSDATA(x) ((wchar_t *) SDATA (x))
 732
 733 /* Convert the multi-byte string in STR to UTF-16LE encoded unibyte
 734    string, and store it in *BUF.  BUF may safely point to STR on entry.  */
 735 extern wchar_t *to_unicode (Lisp_Object str, Lisp_Object *buf);
 736
 737 /* Convert STR, a UTF-16LE encoded string embedded in a unibyte string
 738    object, to a multi-byte Emacs string and return it.  This function
 739    calls code_convert_string_norecord internally and has all its
 740    failure modes.  STR itself is not modified.  */
 741 extern Lisp_Object from_unicode (Lisp_Object str);
 742
 743 /* Convert WSTR to an Emacs string.  */
 744 extern Lisp_Object from_unicode_buffer (const wchar_t *wstr);
 745
 746 #endif /* WINDOWSNT || CYGWIN */
 747
 748 /* Macros for backward compatibility.  */
 749
 750 #define encode_coding_string(coding, string, nocopy)                    \
 751   (STRING_MULTIBYTE(string) ?                                           \
 752     (encode_coding_object (coding, string, 0, 0, SCHARS (string),       \
 753                            SBYTES (string), Qt),                        \
 754      (coding)->dst_object) : (string))
 755
 756
 757 #define decode_coding_c_string(coding, src, bytes, dst_object)          \
 758   do {                                                                  \
 759     (coding)->source = (src);                                           \
 760     (coding)->src_chars = (coding)->src_bytes = (bytes);                \
 761     decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes),       \
 762                           (dst_object));                                \
 763   } while (false)
 764
 765
 766 extern Lisp_Object preferred_coding_system (void);
 767
 768
 769 #ifdef emacs
 770
 771 extern char *emacs_strerror (int);
 772
 773 /* Coding system to be used to encode text for terminal display when
 774    terminal coding system is nil.  */
 775 extern struct coding_system safe_terminal_coding;
 776
 777 #endif
 778
 779 extern char emacs_mule_bytes[256];
 780
 781 #endif /* EMACS_CODING_H */