src/coding.h

   1 /* Header for coding system handler.
   2    Copyright (C) 2001-2015 Free Software Foundation, Inc.
   3    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4      2005, 2006, 2007, 2008, 2009, 2010, 2011
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H14PRO021
   7    Copyright (C) 2003
   8      National Institute of Advanced Industrial Science and Technology (AIST)
   9      Registration Number H13PRO009
  10
  11 This file is part of GNU Emacs.
  12
  13 GNU Emacs is free software: you can redistribute it and/or modify
  14 it under the terms of the GNU General Public License as published by
  15 the Free Software Foundation, either version 3 of the License, or
  16 (at your option) any later version.
  17
  18 GNU Emacs is distributed in the hope that it will be useful,
  19 but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 GNU General Public License for more details.
  22
  23 You should have received a copy of the GNU General Public License
  24 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  25
  26 #ifndef EMACS_CODING_H
  27 #define EMACS_CODING_H
  28
  29 /* Index to arguments of Fdefine_coding_system_internal.  */
  30
  31 enum define_coding_system_arg_index
  32   {
  33     coding_arg_name,
  34     coding_arg_mnemonic,
  35     coding_arg_coding_type,
  36     coding_arg_charset_list,
  37     coding_arg_ascii_compatible_p,
  38     coding_arg_decode_translation_table,
  39     coding_arg_encode_translation_table,
  40     coding_arg_post_read_conversion,
  41     coding_arg_pre_write_conversion,
  42     coding_arg_default_char,
  43     coding_arg_for_unibyte,
  44     coding_arg_plist,
  45     coding_arg_eol_type,
  46     coding_arg_max
  47   };
  48
  49 enum define_coding_iso2022_arg_index
  50   {
  51     coding_arg_iso2022_initial = coding_arg_max,
  52     coding_arg_iso2022_reg_usage,
  53     coding_arg_iso2022_request,
  54     coding_arg_iso2022_flags,
  55     coding_arg_iso2022_max
  56   };
  57
  58 enum define_coding_utf8_arg_index
  59   {
  60     coding_arg_utf8_bom = coding_arg_max,
  61     coding_arg_utf8_max
  62   };
  63
  64 enum define_coding_utf16_arg_index
  65   {
  66     coding_arg_utf16_bom = coding_arg_max,
  67     coding_arg_utf16_endian,
  68     coding_arg_utf16_max
  69   };
  70
  71 enum define_coding_ccl_arg_index
  72   {
  73     coding_arg_ccl_decoder = coding_arg_max,
  74     coding_arg_ccl_encoder,
  75     coding_arg_ccl_valids,
  76     coding_arg_ccl_max
  77   };
  78
  79 enum define_coding_undecided_arg_index
  80   {
  81     coding_arg_undecided_inhibit_null_byte_detection = coding_arg_max,
  82     coding_arg_undecided_inhibit_iso_escape_detection,
  83     coding_arg_undecided_prefer_utf_8,
  84     coding_arg_undecided_max
  85   };
  86
  87 /* Hash table for all coding systems.  Keys are coding system symbols
  88    and values are spec vectors of the corresponding coding system.  A
  89    spec vector has the form [ ATTRS ALIASES EOL-TYPE ].  ATTRS is a
  90    vector of attribute of the coding system.  ALIASES is a list of
  91    aliases (symbols) of the coding system.  EOL-TYPE is `unix', `dos',
  92    `mac' or a vector of coding systems (symbols).  */
  93
  94 extern Lisp_Object Vcoding_system_hash_table;
  95
  96
  97 /* Enumeration of coding system type.  */
  98
  99 enum coding_system_type
 100   {
 101     coding_type_charset,
 102     coding_type_utf_8,
 103     coding_type_utf_16,
 104     coding_type_iso_2022,
 105     coding_type_emacs_mule,
 106     coding_type_sjis,
 107     coding_type_ccl,
 108     coding_type_raw_text,
 109     coding_type_undecided,
 110     coding_type_max
 111   };
 112
 113
 114 /* Enumeration of end-of-line format type.  */
 115
 116 enum end_of_line_type
 117   {
 118     eol_lf,             /* Line-feed only, same as Emacs' internal
 119                            format.  */
 120     eol_crlf,           /* Sequence of carriage-return and
 121                            line-feed.  */
 122     eol_cr,             /* Carriage-return only.  */
 123     eol_any,            /* Accept any of above.  Produce line-feed
 124                            only.  */
 125     eol_undecided,      /* This value is used to denote that the
 126                            eol-type is not yet undecided.  */
 127     eol_type_max
 128   };
 129
 130 /* Enumeration of index to an attribute vector of a coding system.  */
 131
 132 enum coding_attr_index
 133   {
 134     coding_attr_base_name,
 135     coding_attr_docstring,
 136     coding_attr_mnemonic,
 137     coding_attr_type,
 138     coding_attr_charset_list,
 139     coding_attr_ascii_compat,
 140     coding_attr_decode_tbl,
 141     coding_attr_encode_tbl,
 142     coding_attr_trans_tbl,
 143     coding_attr_post_read,
 144     coding_attr_pre_write,
 145     coding_attr_default_char,
 146     coding_attr_for_unibyte,
 147     coding_attr_plist,
 148
 149     coding_attr_category,
 150     coding_attr_safe_charsets,
 151
 152     /* The followings are extra attributes for each type.  */
 153     coding_attr_charset_valids,
 154
 155     coding_attr_ccl_decoder,
 156     coding_attr_ccl_encoder,
 157     coding_attr_ccl_valids,
 158
 159     coding_attr_iso_initial,
 160     coding_attr_iso_usage,
 161     coding_attr_iso_request,
 162     coding_attr_iso_flags,
 163
 164     coding_attr_utf_bom,
 165     coding_attr_utf_16_endian,
 166
 167     coding_attr_emacs_mule_full,
 168
 169     coding_attr_undecided_inhibit_null_byte_detection,
 170     coding_attr_undecided_inhibit_iso_escape_detection,
 171     coding_attr_undecided_prefer_utf_8,
 172
 173     coding_attr_last_index
 174   };
 175
 176
 177 /* Macros to access an element of an attribute vector.  */
 178
 179 #define CODING_ATTR_BASE_NAME(attrs)    AREF (attrs, coding_attr_base_name)
 180 #define CODING_ATTR_TYPE(attrs)         AREF (attrs, coding_attr_type)
 181 #define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
 182 #define CODING_ATTR_MNEMONIC(attrs)     AREF (attrs, coding_attr_mnemonic)
 183 #define CODING_ATTR_DOCSTRING(attrs)    AREF (attrs, coding_attr_docstring)
 184 #define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
 185 #define CODING_ATTR_DECODE_TBL(attrs)   AREF (attrs, coding_attr_decode_tbl)
 186 #define CODING_ATTR_ENCODE_TBL(attrs)   AREF (attrs, coding_attr_encode_tbl)
 187 #define CODING_ATTR_TRANS_TBL(attrs)    AREF (attrs, coding_attr_trans_tbl)
 188 #define CODING_ATTR_POST_READ(attrs)    AREF (attrs, coding_attr_post_read)
 189 #define CODING_ATTR_PRE_WRITE(attrs)    AREF (attrs, coding_attr_pre_write)
 190 #define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
 191 #define CODING_ATTR_FOR_UNIBYTE(attrs)  AREF (attrs, coding_attr_for_unibyte)
 192 #define CODING_ATTR_PLIST(attrs)        AREF (attrs, coding_attr_plist)
 193 #define CODING_ATTR_CATEGORY(attrs)     AREF (attrs, coding_attr_category)
 194 #define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
 195
 196
 197 /* Return the name of a coding system specified by ID.  */
 198 #define CODING_ID_NAME(id) \
 199   (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
 200
 201 /* Return the attribute vector of a coding system specified by ID.  */
 202
 203 #define CODING_ID_ATTRS(id)     \
 204   (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
 205
 206 /* Return the list of aliases of a coding system specified by ID.  */
 207
 208 #define CODING_ID_ALIASES(id)   \
 209   (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
 210
 211 /* Return the eol-type of a coding system specified by ID.  */
 212
 213 #define CODING_ID_EOL_TYPE(id)  \
 214   (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
 215
 216
 217 /* Return the spec vector of CODING_SYSTEM_SYMBOL.  */
 218
 219 #define CODING_SYSTEM_SPEC(coding_system_symbol)        \
 220   (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
 221
 222
 223 /* Return the ID of CODING_SYSTEM_SYMBOL.  */
 224
 225 #define CODING_SYSTEM_ID(coding_system_symbol)                  \
 226   hash_lookup (XHASH_TABLE (Vcoding_system_hash_table),         \
 227                coding_system_symbol, NULL)
 228
 229 /* Return true if CODING_SYSTEM_SYMBOL is a coding system.  */
 230
 231 #define CODING_SYSTEM_P(coding_system_symbol)           \
 232   (CODING_SYSTEM_ID (coding_system_symbol) >= 0         \
 233    || (! NILP (coding_system_symbol)                    \
 234        && ! NILP (Fcoding_system_p (coding_system_symbol))))
 235
 236 /* Check if X is a coding system or not.  */
 237
 238 #define CHECK_CODING_SYSTEM(x)                          \
 239   do {                                                  \
 240     if (CODING_SYSTEM_ID (x) < 0                        \
 241         && NILP (Fcheck_coding_system (x)))             \
 242       wrong_type_argument (Qcoding_system_p, (x));      \
 243   } while (false)
 244
 245
 246 /* Check if X is a coding system or not.  If it is, set SEPC to the
 247    spec vector of the coding system.  */
 248
 249 #define CHECK_CODING_SYSTEM_GET_SPEC(x, spec)           \
 250   do {                                                  \
 251     spec = CODING_SYSTEM_SPEC (x);                      \
 252     if (NILP (spec))                                    \
 253       {                                                 \
 254         Fcheck_coding_system (x);                       \
 255         spec = CODING_SYSTEM_SPEC (x);                  \
 256       }                                                 \
 257     if (NILP (spec))                                    \
 258       wrong_type_argument (Qcoding_system_p, (x));      \
 259   } while (false)
 260
 261
 262 /* Check if X is a coding system or not.  If it is, set ID to the
 263    ID of the coding system.  */
 264
 265 #define CHECK_CODING_SYSTEM_GET_ID(x, id)                       \
 266   do                                                            \
 267     {                                                           \
 268       id = CODING_SYSTEM_ID (x);                                \
 269       if (id < 0)                                               \
 270         {                                                       \
 271           Fcheck_coding_system (x);                             \
 272           id = CODING_SYSTEM_ID (x);                            \
 273         }                                                       \
 274       if (id < 0)                                               \
 275         wrong_type_argument (Qcoding_system_p, (x));    \
 276     } while (false)
 277
 278
 279 /*** GENERAL section ***/
 280
 281 /* Enumeration of result code of code conversion.  */
 282 enum coding_result_code
 283   {
 284     CODING_RESULT_SUCCESS,
 285     CODING_RESULT_INSUFFICIENT_SRC,
 286     CODING_RESULT_INSUFFICIENT_DST,
 287     CODING_RESULT_INVALID_SRC,
 288     CODING_RESULT_INTERRUPT
 289   };
 290
 291
 292 /* Macros used for the member `mode' of the struct coding_system.  */
 293
 294 /* If set, the decoding/encoding routines treat the current data as
 295    the last block of the whole text to be converted, and do the
 296    appropriate finishing job.  */
 297 #define CODING_MODE_LAST_BLOCK                  0x01
 298
 299 /* If set, it means that the current source text is in a buffer which
 300    enables selective display.  */
 301 #define CODING_MODE_SELECTIVE_DISPLAY           0x02
 302
 303 /* This flag is used by the decoding/encoding routines on the fly.  If
 304    set, it means that right-to-left text is being processed.  */
 305 #define CODING_MODE_DIRECTION                   0x04
 306
 307 #define CODING_MODE_FIXED_DESTINATION           0x08
 308
 309 /* If set, it means that the encoding routines produces some safe
 310    ASCII characters (usually '?') for unsupported characters.  */
 311 #define CODING_MODE_SAFE_ENCODING               0x10
 312
 313   /* For handling composition sequence.  */
 314 #include "composite.h"
 315
 316 enum composition_state
 317   {
 318     COMPOSING_NO,
 319     COMPOSING_CHAR,
 320     COMPOSING_RULE,
 321     COMPOSING_COMPONENT_CHAR,
 322     COMPOSING_COMPONENT_RULE
 323   };
 324
 325 /* Structure for the current composition status.  */
 326 struct composition_status
 327 {
 328   enum composition_state state;
 329   enum composition_method method;
 330   bool old_form;          /* true if pre-21 form */
 331   int length;             /* number of elements produced in charbuf */
 332   int nchars;             /* number of characters composed */
 333   int ncomps;             /* number of composition components */
 334   /* Maximum carryover is for the case of COMPOSITION_WITH_RULE_ALTCHARS.
 335      See the comment in coding.c.  */
 336   int carryover[4               /* annotation header */
 337                 + MAX_COMPOSITION_COMPONENTS * 3 - 2 /* ALTs and RULEs */
 338                 + 2                                  /* intermediate -1 -1 */
 339                 + MAX_COMPOSITION_COMPONENTS         /* CHARs */
 340                 ];
 341 };
 342
 343
 344 /* Structure of the field `spec.iso_2022' in the structure
 345    `coding_system'.  */
 346 struct iso_2022_spec
 347 {
 348   /* Bit-wise-or of CODING_ISO_FLAG_XXX.  */
 349   unsigned flags;
 350
 351   /* The current graphic register invoked to each graphic plane.  */
 352   int current_invocation[2];
 353
 354   /* The current charset designated to each graphic register.  The
 355      value -1 means that not charset is designated, -2 means that
 356      there was an invalid designation previously.  */
 357   int current_designation[4];
 358
 359   /* If positive, we are now scanning CTEXT extended segment.  */
 360   int ctext_extended_segment_len;
 361
 362   /* True temporarily only when graphic register 2 or 3 is invoked by
 363      single-shift while encoding.  */
 364   bool_bf single_shifting : 1;
 365
 366   /* True temporarily only when processing at beginning of line.  */
 367   bool_bf bol : 1;
 368
 369   /* If true, we are now scanning embedded UTF-8 sequence.  */
 370   bool_bf embedded_utf_8 : 1;
 371
 372   /* The current composition.  */
 373   struct composition_status cmp_status;
 374 };
 375
 376 struct emacs_mule_spec
 377 {
 378   struct composition_status cmp_status;
 379 };
 380
 381 struct undecided_spec
 382 {
 383   /* Inhibit null byte detection.  1 means always inhibit,
 384      -1 means do not inhibit, 0 means rely on user variable.  */
 385   int inhibit_nbd;
 386
 387   /* Inhibit ISO escape detection.  -1, 0, 1 as above.  */
 388   int inhibit_ied;
 389
 390   /* Prefer UTF-8 when the input could be other encodings.  */
 391   bool prefer_utf_8;
 392 };
 393
 394 enum utf_bom_type
 395   {
 396     utf_detect_bom,
 397     utf_without_bom,
 398     utf_with_bom
 399   };
 400
 401 enum utf_16_endian_type
 402   {
 403     utf_16_big_endian,
 404     utf_16_little_endian
 405   };
 406
 407 struct utf_16_spec
 408 {
 409   enum utf_bom_type bom;
 410   enum utf_16_endian_type endian;
 411   int surrogate;
 412 };
 413
 414 struct coding_detection_info
 415 {
 416   /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs.  */
 417   /* Which categories are already checked.  */
 418   int checked;
 419   /* Which categories are strongly found.  */
 420   int found;
 421   /* Which categories are rejected.  */
 422   int rejected;
 423 };
 424
 425
 426 struct coding_system
 427 {
 428   /* ID number of the coding system.  This is an index to
 429      Vcoding_system_hash_table.  This value is set by
 430      setup_coding_system.  At the early stage of building time, this
 431      value is -1 in the array coding_categories to indicate that no
 432      coding-system of that category is yet defined.  */
 433   ptrdiff_t id;
 434
 435   /* Flag bits of the coding system.  The meaning of each bit is common
 436      to all types of coding systems.  */
 437   unsigned common_flags : 14;
 438
 439   /* Mode bits of the coding system.  See the comments of the macros
 440      CODING_MODE_XXX.  */
 441   unsigned mode : 5;
 442
 443   /* The following two members specify how binary 8-bit code 128..255
 444      are represented in source and destination text respectively.  True
 445      means they are represented by 2-byte sequence, false means they are
 446      represented by 1-byte as is (see the comment in character.h).  */
 447   bool_bf src_multibyte : 1;
 448   bool_bf dst_multibyte : 1;
 449
 450   /* True if the source of conversion is not in the member
 451      `charbuf', but at `src_object'.  */
 452   bool_bf chars_at_source : 1;
 453
 454   /* Nonzero if the result of conversion is in `destination'
 455      buffer rather than in `dst_object'.  */
 456   bool_bf raw_destination : 1;
 457
 458   /* Set to true if charbuf contains an annotation.  */
 459   bool_bf annotated : 1;
 460
 461   /* Used internally in coding.c.  See the comment of detect_ascii.  */
 462   unsigned eol_seen : 3;
 463
 464   /* Finish status of code conversion.  */
 465   ENUM_BF (coding_result_code) result : 3;
 466
 467   int max_charset_id;
 468
 469   /* Detailed information specific to each type of coding system.  */
 470   union
 471     {
 472       struct iso_2022_spec iso_2022;
 473       struct ccl_spec *ccl;     /* Defined in ccl.h.  */
 474       struct utf_16_spec utf_16;
 475       enum utf_bom_type utf_8_bom;
 476       struct emacs_mule_spec emacs_mule;
 477       struct undecided_spec undecided;
 478     } spec;
 479
 480   unsigned char *safe_charsets;
 481
 482   /* How may heading bytes we can skip for decoding.  This is set to
 483      -1 in setup_coding_system, and updated by detect_coding.  So,
 484      when this is equal to the byte length of the text being
 485      converted, we can skip the actual conversion process except for
 486      the eol format.  */
 487   ptrdiff_t head_ascii;
 488
 489   /* How many bytes/chars at the source are detected as valid utf-8
 490      sequence.  Set by detect_coding_utf_8.  */
 491   ptrdiff_t detected_utf8_bytes, detected_utf8_chars;
 492
 493   /* The following members are set by encoding/decoding routine.  */
 494   ptrdiff_t produced, produced_char, consumed, consumed_char;
 495
 496   ptrdiff_t src_pos, src_pos_byte, src_chars, src_bytes;
 497   Lisp_Object src_object;
 498   const unsigned char *source;
 499
 500   ptrdiff_t dst_pos, dst_pos_byte, dst_bytes;
 501   Lisp_Object dst_object;
 502   unsigned char *destination;
 503
 504   /* If an element is non-negative, it is a character code.
 505
 506      If it is in the range -128..-1, it is a 8-bit character code
 507      minus 256.
 508
 509      If it is less than -128, it specifies the start of an annotation
 510      chunk.  The length of the chunk is -128 minus the value of the
 511      element.  The following elements are OFFSET, ANNOTATION-TYPE, and
 512      a sequence of actual data for the annotation.  OFFSET is a
 513      character position offset from dst_pos or src_pos,
 514      ANNOTATION-TYPE specifies the meaning of the annotation and how to
 515      handle the following data..  */
 516   int *charbuf;
 517   int charbuf_size, charbuf_used;
 518
 519   unsigned char carryover[64];
 520   int carryover_bytes;
 521
 522   int default_char;
 523
 524   bool (*detector) (struct coding_system *, struct coding_detection_info *);
 525   void (*decoder) (struct coding_system *);
 526   bool (*encoder) (struct coding_system *);
 527 };
 528
 529 /* Meanings of bits in the member `common_flags' of the structure
 530    coding_system.  The lowest 8 bits are reserved for various kind of
 531    annotations (currently two of them are used).  */
 532 #define CODING_ANNOTATION_MASK                  0x00FF
 533 #define CODING_ANNOTATE_COMPOSITION_MASK        0x0001
 534 #define CODING_ANNOTATE_DIRECTION_MASK          0x0002
 535 #define CODING_ANNOTATE_CHARSET_MASK            0x0003
 536 #define CODING_FOR_UNIBYTE_MASK                 0x0100
 537 #define CODING_REQUIRE_FLUSHING_MASK            0x0200
 538 #define CODING_REQUIRE_DECODING_MASK            0x0400
 539 #define CODING_REQUIRE_ENCODING_MASK            0x0800
 540 #define CODING_REQUIRE_DETECTION_MASK           0x1000
 541 #define CODING_RESET_AT_BOL_MASK                0x2000
 542
 543 /* Return nonzero if the coding context CODING requires annotation
 544    handling.  */
 545 #define CODING_REQUIRE_ANNOTATION(coding) \
 546   ((coding)->common_flags & CODING_ANNOTATION_MASK)
 547
 548 /* Return nonzero if the coding context CODING prefers decoding into
 549    unibyte.  */
 550 #define CODING_FOR_UNIBYTE(coding) \
 551   ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
 552
 553 /* Return nonzero if the coding context CODING requires specific code to be
 554    attached at the tail of converted text.  */
 555 #define CODING_REQUIRE_FLUSHING(coding) \
 556   ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
 557
 558 /* Return nonzero if the coding context CODING requires code conversion on
 559    decoding.  */
 560 #define CODING_REQUIRE_DECODING(coding) \
 561   ((coding)->dst_multibyte              \
 562    || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
 563
 564
 565 /* Return nonzero if the coding context CODING requires code conversion on
 566    encoding.
 567    The non-multibyte part of the condition is to support encoding of
 568    unibyte strings/buffers generated by string-as-unibyte or
 569    (set-buffer-multibyte nil) from multibyte strings/buffers.  */
 570 #define CODING_REQUIRE_ENCODING(coding)                         \
 571   ((coding)->src_multibyte                                      \
 572    || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK     \
 573    || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
 574
 575
 576 /* Return nonzero if the coding context CODING requires some kind of code
 577    detection.  */
 578 #define CODING_REQUIRE_DETECTION(coding) \
 579   ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
 580
 581 /* Return nonzero if the coding context CODING requires code conversion on
 582    decoding or some kind of code detection.  */
 583 #define CODING_MAY_REQUIRE_DECODING(coding)     \
 584   (CODING_REQUIRE_DECODING (coding)             \
 585    || CODING_REQUIRE_DETECTION (coding))
 586
 587 /* Macros to decode or encode a character of JISX0208 in SJIS.  S1 and
 588    S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
 589    system.  C1 and C2 are the 1st and 2nd position codes of Emacs'
 590    internal format.  */
 591
 592 #define SJIS_TO_JIS(code)                               \
 593   do {                                                  \
 594     int s1, s2, j1, j2;                                 \
 595                                                         \
 596     s1 = (code) >> 8, s2 = (code) & 0xFF;               \
 597                                                         \
 598     if (s2 >= 0x9F)                                     \
 599       (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0),       \
 600        j2 = s2 - 0x7E);                                 \
 601     else                                                \
 602       (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1),     \
 603        j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F));         \
 604     (code) = (j1 << 8) | j2;                            \
 605   } while (false)
 606
 607 #define SJIS_TO_JIS2(code)                              \
 608   do {                                                  \
 609     int s1, s2, j1, j2;                                 \
 610                                                         \
 611     s1 = (code) >> 8, s2 = (code) & 0xFF;               \
 612                                                         \
 613     if (s2 >= 0x9F)                                     \
 614       {                                                 \
 615         j1 = (s1 == 0xF0 ? 0x28                         \
 616               : s1 == 0xF1 ? 0x24                       \
 617               : s1 == 0xF2 ? 0x2C                       \
 618               : s1 == 0xF3 ? 0x2E                       \
 619               : 0x6E + (s1 - 0xF4) * 2);                \
 620         j2 = s2 - 0x7E;                                 \
 621       }                                                 \
 622     else                                                \
 623       {                                                 \
 624         j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2       \
 625               : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2     \
 626               : 0x6F + (s1 - 0xF5) * 2);                \
 627         j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F));         \
 628       }                                                 \
 629     (code) = (j1 << 8) | j2;                            \
 630   } while (false)
 631
 632
 633 #define JIS_TO_SJIS(code)                               \
 634   do {                                                  \
 635     int s1, s2, j1, j2;                                 \
 636                                                         \
 637     j1 = (code) >> 8, j2 = (code) & 0xFF;               \
 638     if (j1 & 1)                                         \
 639       (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1),       \
 640        s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F));         \
 641     else                                                \
 642       (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0),       \
 643        s2 = j2 + 0x7E);                                 \
 644     (code) = (s1 << 8) | s2;                            \
 645   } while (false)
 646
 647 #define JIS_TO_SJIS2(code)                              \
 648   do {                                                  \
 649     int s1, s2, j1, j2;                                 \
 650                                                         \
 651     j1 = (code) >> 8, j2 = (code) & 0xFF;               \
 652     if (j1 & 1)                                         \
 653       {                                                 \
 654         s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2       \
 655               : j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2     \
 656               : 0xF5 + (j1 - 0x6F) / 2);                \
 657         s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F);         \
 658       }                                                 \
 659     else                                                \
 660       {                                                 \
 661         s1 = (j1 == 0x28 ? 0xF0                         \
 662               : j1 == 0x24 ? 0xF1                       \
 663               : j1 == 0x2C ? 0xF2                       \
 664               : j1 == 0x2E ? 0xF3                       \
 665               : 0xF4 + (j1 - 0x6E) / 2);                \
 666         s2 = j2 + 0x7E;                                 \
 667       }                                                 \
 668     (code) = (s1 << 8) | s2;                            \
 669   } while (false)
 670
 671 /* Encode the file name NAME using the specified coding system
 672    for file names, if any.  */
 673 #define ENCODE_FILE(NAME)  encode_file_name (NAME)
 674
 675 /* Decode the file name NAME using the specified coding system
 676    for file names, if any.  */
 677 #define DECODE_FILE(NAME)  decode_file_name (NAME)
 678
 679 /* Encode the string STR using the specified coding system
 680    for system functions, if any.  */
 681 #define ENCODE_SYSTEM(str)                                                 \
 682   (! NILP (Vlocale_coding_system)                                          \
 683    ? code_convert_string_norecord (str, Vlocale_coding_system, true)       \
 684    : str)
 685
 686 /* Decode the string STR using the specified coding system
 687    for system functions, if any.  */
 688 #define DECODE_SYSTEM(str)                                                 \
 689   (! NILP (Vlocale_coding_system)                                          \
 690    ? code_convert_string_norecord (str, Vlocale_coding_system, false)      \
 691    : str)
 692
 693 /* Note that this encodes utf-8, not utf-8-emacs, so it's not a no-op.  */
 694 #define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, true)
 695
 696 /* Extern declarations.  */
 697 extern Lisp_Object code_conversion_save (bool, bool);
 698 extern void setup_coding_system (Lisp_Object, struct coding_system *);
 699 extern Lisp_Object coding_charset_list (struct coding_system *);
 700 extern Lisp_Object coding_system_charset_list (Lisp_Object);
 701 extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
 702                                         Lisp_Object, bool, bool, bool);
 703 extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
 704                                                  bool);
 705 extern Lisp_Object encode_file_name (Lisp_Object);
 706 extern Lisp_Object decode_file_name (Lisp_Object);
 707 extern Lisp_Object raw_text_coding_system (Lisp_Object);
 708 extern bool raw_text_coding_system_p (struct coding_system *);
 709 extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
 710 extern Lisp_Object complement_process_encoding_system (Lisp_Object);
 711
 712 extern void decode_coding_gap (struct coding_system *,
 713                                ptrdiff_t, ptrdiff_t);
 714 extern void decode_coding_object (struct coding_system *,
 715                                   Lisp_Object, ptrdiff_t, ptrdiff_t,
 716                                   ptrdiff_t, ptrdiff_t, Lisp_Object);
 717 extern void encode_coding_object (struct coding_system *,
 718                                   Lisp_Object, ptrdiff_t, ptrdiff_t,
 719                                   ptrdiff_t, ptrdiff_t, Lisp_Object);
 720
 721 #if defined (WINDOWSNT) || defined (CYGWIN)
 722
 723 /* These functions use Lisp string objects to store the UTF-16LE
 724    strings that modern versions of Windows expect.  These strings are
 725    not particularly useful to Lisp, and all Lisp strings should be
 726    native Emacs multibyte.  */
 727
 728 /* Access the wide-character string stored in a Lisp string object.  */
 729 #define WCSDATA(x) ((wchar_t *) SDATA (x))
 730
 731 /* Convert the multi-byte string in STR to UTF-16LE encoded unibyte
 732    string, and store it in *BUF.  BUF may safely point to STR on entry.  */
 733 extern wchar_t *to_unicode (Lisp_Object str, Lisp_Object *buf);
 734
 735 /* Convert STR, a UTF-16LE encoded string embedded in a unibyte string
 736    object, to a multi-byte Emacs string and return it.  This function
 737    calls code_convert_string_norecord internally and has all its
 738    failure modes.  STR itself is not modified.  */
 739 extern Lisp_Object from_unicode (Lisp_Object str);
 740
 741 /* Convert WSTR to an Emacs string.  */
 742 extern Lisp_Object from_unicode_buffer (const wchar_t *wstr);
 743
 744 #endif /* WINDOWSNT || CYGWIN */
 745
 746 /* Macros for backward compatibility.  */
 747
 748 #define encode_coding_string(coding, string, nocopy)                    \
 749   (STRING_MULTIBYTE(string) ?                                           \
 750     (encode_coding_object (coding, string, 0, 0, SCHARS (string),       \
 751                            SBYTES (string), Qt),                        \
 752      (coding)->dst_object) : (string))
 753
 754
 755 #define decode_coding_c_string(coding, src, bytes, dst_object)          \
 756   do {                                                                  \
 757     (coding)->source = (src);                                           \
 758     (coding)->src_chars = (coding)->src_bytes = (bytes);                \
 759     decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes),       \
 760                           (dst_object));                                \
 761   } while (false)
 762
 763
 764 extern Lisp_Object preferred_coding_system (void);
 765
 766
 767 #ifdef emacs
 768
 769 extern char *emacs_strerror (int);
 770
 771 /* Coding system to be used to encode text for terminal display when
 772    terminal coding system is nil.  */
 773 extern struct coding_system safe_terminal_coding;
 774
 775 #endif
 776
 777 extern char emacs_mule_bytes[256];
 778
 779 #endif /* EMACS_CODING_H */