src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001-2012 Free Software Foundation, Inc.
   3    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4      2005, 2006, 2007, 2008, 2009, 2010, 2011
   5      National Institute of Advanced Industrial Science and Technology (AIST)
   6      Registration Number H14PRO021
   7    Copyright (C) 2003
   8      National Institute of Advanced Industrial Science and Technology (AIST)
   9      Registration Number H13PRO009
  10
  11 This file is part of GNU Emacs.
  12
  13 GNU Emacs is free software: you can redistribute it and/or modify
  14 it under the terms of the GNU General Public License as published by
  15 the Free Software Foundation, either version 3 of the License, or
  16 (at your option) any later version.
  17
  18 GNU Emacs is distributed in the hope that it will be useful,
  19 but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 GNU General Public License for more details.
  22
  23 You should have received a copy of the GNU General Public License
  24 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  On
  59   the C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (struct coding_system *coding,
 157                    struct coding_detection_info *detect_info)
 158 {
 159   const unsigned char *src = coding->source;
 160   const unsigned char *src_end = coding->source + coding->src_bytes;
 161   int multibytep = coding->src_multibyte;
 162   EMACS_INT consumed_chars = 0;
 163   int found = 0;
 164   ...;
 165
 166   while (1)
 167     {
 168       /* Get one byte from the source.  If the source is exhausted, jump
 169          to no_more_source:.  */
 170       ONE_MORE_BYTE (c);
 171
 172       if (! __C_conforms_to_XXX___ (c))
 173         break;
 174       if (! __C_strongly_suggests_XXX__ (c))
 175         found = CATEGORY_MASK_XXX;
 176     }
 177   /* The byte sequence is invalid for XXX.  */
 178   detect_info->rejected |= CATEGORY_MASK_XXX;
 179   return 0;
 180
 181  no_more_source:
 182   /* The source exhausted successfully.  */
 183   detect_info->found |= found;
 184   return 1;
 185 }
 186 #endif
 187
 188 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 189
 190   These functions decode a byte sequence specified as a source by
 191   CODING.  The resulting multibyte text goes to a place pointed to by
 192   CODING->charbuf, the length of which should not exceed
 193   CODING->charbuf_size;
 194
 195   These functions set the information of original and decoded texts in
 196   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 197   They also set CODING->result to one of CODING_RESULT_XXX indicating
 198   how the decoding is finished.
 199
 200   Below is the template of these functions.  */
 201
 202 #if 0
 203 static void
 204 decode_coding_XXXX (struct coding_system *coding)
 205 {
 206   const unsigned char *src = coding->source + coding->consumed;
 207   const unsigned char *src_end = coding->source + coding->src_bytes;
 208   /* SRC_BASE remembers the start position in source in each loop.
 209      The loop will be exited when there's not enough source code, or
 210      when there's no room in CHARBUF for a decoded character.  */
 211   const unsigned char *src_base;
 212   /* A buffer to produce decoded characters.  */
 213   int *charbuf = coding->charbuf + coding->charbuf_used;
 214   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 215   int multibytep = coding->src_multibyte;
 216
 217   while (1)
 218     {
 219       src_base = src;
 220       if (charbuf < charbuf_end)
 221         /* No more room to produce a decoded character.  */
 222         break;
 223       ONE_MORE_BYTE (c);
 224       /* Decode it. */
 225     }
 226
 227  no_more_source:
 228   if (src_base < src_end
 229       && coding->mode & CODING_MODE_LAST_BLOCK)
 230     /* If the source ends by partial bytes to construct a character,
 231        treat them as eight-bit raw data.  */
 232     while (src_base < src_end && charbuf < charbuf_end)
 233       *charbuf++ = *src_base++;
 234   /* Remember how many bytes and characters we consumed.  If the
 235      source is multibyte, the bytes and chars are not identical.  */
 236   coding->consumed = coding->consumed_char = src_base - coding->source;
 237   /* Remember how many characters we produced.  */
 238   coding->charbuf_used = charbuf - coding->charbuf;
 239 }
 240 #endif
 241
 242 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 243
 244   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 245   internal multibyte format by CODING.  The resulting byte sequence
 246   goes to a place pointed to by DESTINATION, the length of which
 247   should not exceed DST_BYTES.
 248
 249   These functions set the information of original and encoded texts in
 250   the members produced, produced_char, consumed, and consumed_char of
 251   the structure *CODING.  They also set the member result to one of
 252   CODING_RESULT_XXX indicating how the encoding finished.
 253
 254   DST_BYTES zero means that source area and destination area are
 255   overlapped, which means that we can produce a encoded text until it
 256   reaches at the head of not-yet-encoded source text.
 257
 258   Below is a template of these functions.  */
 259 #if 0
 260 static void
 261 encode_coding_XXX (struct coding_system *coding)
 262 {
 263   int multibytep = coding->dst_multibyte;
 264   int *charbuf = coding->charbuf;
 265   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 266   unsigned char *dst = coding->destination + coding->produced;
 267   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 268   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 269   EMACS_INT produced_chars = 0;
 270
 271   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 272     {
 273       int c = *charbuf;
 274       /* Encode C into DST, and increment DST.  */
 275     }
 276  label_no_more_destination:
 277   /* How many chars and bytes we produced.  */
 278   coding->produced_char += produced_chars;
 279   coding->produced = dst - coding->destination;
 280 }
 281 #endif
 282
 283 \f
 284 /*** 1. Preamble ***/
 285
 286 #include <config.h>
 287 #include <stdio.h>
 288 #include <setjmp.h>
 289
 290 #include "lisp.h"
 291 #include "buffer.h"
 292 #include "character.h"
 293 #include "charset.h"
 294 #include "ccl.h"
 295 #include "composite.h"
 296 #include "coding.h"
 297 #include "window.h"
 298 #include "frame.h"
 299 #include "termhooks.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 static Lisp_Object Qcoding_system, Qeol_type;
 304 static Lisp_Object Qcoding_aliases;
 305 Lisp_Object Qunix, Qdos;
 306 Lisp_Object Qbuffer_file_coding_system;
 307 static Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 static Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qutf_8;
 311 static Lisp_Object Qiso_2022;
 312 static Lisp_Object Qutf_16, Qshift_jis, Qbig5;
 313 static Lisp_Object Qbig, Qlittle;
 314 static Lisp_Object Qcoding_system_history;
 315 static Lisp_Object Qvalid_codes;
 316 static Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 317 static Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 318 static Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 319 static Lisp_Object QCascii_compatible_p;
 320
 321 Lisp_Object Qcall_process, Qcall_process_region;
 322 Lisp_Object Qstart_process, Qopen_network_stream;
 323 static Lisp_Object Qtarget_idx;
 324
 325 static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 326 static Lisp_Object Qinterrupted, Qinsufficient_memory;
 327
 328 /* If a symbol has this property, evaluate the value to define the
 329    symbol as a coding system.  */
 330 static Lisp_Object Qcoding_system_define_form;
 331
 332 /* Format of end-of-line decided by system.  This is Qunix on
 333    Unix and Mac, Qdos on DOS/Windows.
 334    This has an effect only for external encoding (i.e. for output to
 335    file and process), not for in-buffer or Lisp string encoding.  */
 336 static Lisp_Object system_eol_type;
 337
 338 #ifdef emacs
 339
 340 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 341
 342 /* Coding system emacs-mule and raw-text are for converting only
 343    end-of-line format.  */
 344 Lisp_Object Qemacs_mule, Qraw_text;
 345 Lisp_Object Qutf_8_emacs;
 346
 347 /* Coding-systems are handed between Emacs Lisp programs and C internal
 348    routines by the following three variables.  */
 349 /* Coding system to be used to encode text for terminal display when
 350    terminal coding system is nil.  */
 351 struct coding_system safe_terminal_coding;
 352
 353 #endif /* emacs */
 354
 355 Lisp_Object Qtranslation_table;
 356 Lisp_Object Qtranslation_table_id;
 357 static Lisp_Object Qtranslation_table_for_decode;
 358 static Lisp_Object Qtranslation_table_for_encode;
 359
 360 /* Two special coding systems.  */
 361 static Lisp_Object Vsjis_coding_system;
 362 static Lisp_Object Vbig5_coding_system;
 363
 364 /* ISO2022 section */
 365
 366 #define CODING_ISO_INITIAL(coding, reg)                 \
 367   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 368                      coding_attr_iso_initial),          \
 369                reg)))
 370
 371
 372 #define CODING_ISO_REQUEST(coding, charset_id)          \
 373   (((charset_id) <= (coding)->max_charset_id            \
 374     ? ((coding)->safe_charsets[charset_id] != 255       \
 375        ? (coding)->safe_charsets[charset_id]            \
 376        : -1)                                            \
 377     : -1))
 378
 379
 380 #define CODING_ISO_FLAGS(coding)        \
 381   ((coding)->spec.iso_2022.flags)
 382 #define CODING_ISO_DESIGNATION(coding, reg)     \
 383   ((coding)->spec.iso_2022.current_designation[reg])
 384 #define CODING_ISO_INVOCATION(coding, plane)    \
 385   ((coding)->spec.iso_2022.current_invocation[plane])
 386 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 387   ((coding)->spec.iso_2022.single_shifting)
 388 #define CODING_ISO_BOL(coding)  \
 389   ((coding)->spec.iso_2022.bol)
 390 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 391   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 392 #define CODING_ISO_CMP_STATUS(coding)   \
 393   (&(coding)->spec.iso_2022.cmp_status)
 394 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 395   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 396 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 397   ((coding)->spec.iso_2022.embedded_utf_8)
 398
 399 /* Control characters of ISO2022.  */
 400                         /* code */      /* function */
 401 #define ISO_CODE_SO     0x0E            /* shift-out */
 402 #define ISO_CODE_SI     0x0F            /* shift-in */
 403 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 404 #define ISO_CODE_ESC    0x1B            /* escape */
 405 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 406 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 407 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 408
 409 /* All code (1-byte) of ISO2022 is classified into one of the
 410    followings.  */
 411 enum iso_code_class_type
 412   {
 413     ISO_control_0,              /* Control codes in the range
 414                                    0x00..0x1F and 0x7F, except for the
 415                                    following 5 codes.  */
 416     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 417     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 418     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 419     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 420     ISO_control_1,              /* Control codes in the range
 421                                    0x80..0x9F, except for the
 422                                    following 3 codes.  */
 423     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 424     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 425     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 426     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 427     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 428     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 429     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 430   };
 431
 432 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 433     `iso-flags' attribute of an iso2022 coding system.  */
 434
 435 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 436    instead of the correct short-form sequence (e.g. ESC $ A).  */
 437 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 438
 439 /* If set, reset graphic planes and registers at end-of-line to the
 440    initial state.  */
 441 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 442
 443 /* If set, reset graphic planes and registers before any control
 444    characters to the initial state.  */
 445 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 446
 447 /* If set, encode by 7-bit environment.  */
 448 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 449
 450 /* If set, use locking-shift function.  */
 451 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 452
 453 /* If set, use single-shift function.  Overwrite
 454    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 455 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 456
 457 /* If set, use designation escape sequence.  */
 458 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 459
 460 /* If set, produce revision number sequence.  */
 461 #define CODING_ISO_FLAG_REVISION        0x0080
 462
 463 /* If set, produce ISO6429's direction specifying sequence.  */
 464 #define CODING_ISO_FLAG_DIRECTION       0x0100
 465
 466 /* If set, assume designation states are reset at beginning of line on
 467    output.  */
 468 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 469
 470 /* If set, designation sequence should be placed at beginning of line
 471    on output.  */
 472 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 473
 474 /* If set, do not encode unsafe characters on output.  */
 475 #define CODING_ISO_FLAG_SAFE            0x0800
 476
 477 /* If set, extra latin codes (128..159) are accepted as a valid code
 478    on input.  */
 479 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 480
 481 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 482
 483 /* #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000 */
 484
 485 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 486
 487 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 488
 489 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 490
 491 /* A character to be produced on output if encoding of the original
 492    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 493 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 494
 495 /* UTF-8 section */
 496 #define CODING_UTF_8_BOM(coding)        \
 497   ((coding)->spec.utf_8_bom)
 498
 499 /* UTF-16 section */
 500 #define CODING_UTF_16_BOM(coding)       \
 501   ((coding)->spec.utf_16.bom)
 502
 503 #define CODING_UTF_16_ENDIAN(coding)    \
 504   ((coding)->spec.utf_16.endian)
 505
 506 #define CODING_UTF_16_SURROGATE(coding) \
 507   ((coding)->spec.utf_16.surrogate)
 508
 509
 510 /* CCL section */
 511 #define CODING_CCL_DECODER(coding)      \
 512   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 513 #define CODING_CCL_ENCODER(coding)      \
 514   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 515 #define CODING_CCL_VALIDS(coding)                                          \
 516   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 517
 518 /* Index for each coding category in `coding_categories' */
 519
 520 enum coding_category
 521   {
 522     coding_category_iso_7,
 523     coding_category_iso_7_tight,
 524     coding_category_iso_8_1,
 525     coding_category_iso_8_2,
 526     coding_category_iso_7_else,
 527     coding_category_iso_8_else,
 528     coding_category_utf_8_auto,
 529     coding_category_utf_8_nosig,
 530     coding_category_utf_8_sig,
 531     coding_category_utf_16_auto,
 532     coding_category_utf_16_be,
 533     coding_category_utf_16_le,
 534     coding_category_utf_16_be_nosig,
 535     coding_category_utf_16_le_nosig,
 536     coding_category_charset,
 537     coding_category_sjis,
 538     coding_category_big5,
 539     coding_category_ccl,
 540     coding_category_emacs_mule,
 541     /* All above are targets of code detection.  */
 542     coding_category_raw_text,
 543     coding_category_undecided,
 544     coding_category_max
 545   };
 546
 547 /* Definitions of flag bits used in detect_coding_XXXX.  */
 548 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 549 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 550 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 551 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 552 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 553 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 554 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 555 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 556 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 557 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 558 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 559 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 560 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 561 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 562 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 563 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 564 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 565 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 566 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 567 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 568
 569 /* This value is returned if detect_coding_mask () find nothing other
 570    than ASCII characters.  */
 571 #define CATEGORY_MASK_ANY               \
 572   (CATEGORY_MASK_ISO_7                  \
 573    | CATEGORY_MASK_ISO_7_TIGHT          \
 574    | CATEGORY_MASK_ISO_8_1              \
 575    | CATEGORY_MASK_ISO_8_2              \
 576    | CATEGORY_MASK_ISO_7_ELSE           \
 577    | CATEGORY_MASK_ISO_8_ELSE           \
 578    | CATEGORY_MASK_UTF_8_AUTO           \
 579    | CATEGORY_MASK_UTF_8_NOSIG          \
 580    | CATEGORY_MASK_UTF_8_SIG            \
 581    | CATEGORY_MASK_UTF_16_AUTO          \
 582    | CATEGORY_MASK_UTF_16_BE            \
 583    | CATEGORY_MASK_UTF_16_LE            \
 584    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 585    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 586    | CATEGORY_MASK_CHARSET              \
 587    | CATEGORY_MASK_SJIS                 \
 588    | CATEGORY_MASK_BIG5                 \
 589    | CATEGORY_MASK_CCL                  \
 590    | CATEGORY_MASK_EMACS_MULE)
 591
 592
 593 #define CATEGORY_MASK_ISO_7BIT \
 594   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 595
 596 #define CATEGORY_MASK_ISO_8BIT \
 597   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 598
 599 #define CATEGORY_MASK_ISO_ELSE \
 600   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 601
 602 #define CATEGORY_MASK_ISO_ESCAPE        \
 603   (CATEGORY_MASK_ISO_7                  \
 604    | CATEGORY_MASK_ISO_7_TIGHT          \
 605    | CATEGORY_MASK_ISO_7_ELSE           \
 606    | CATEGORY_MASK_ISO_8_ELSE)
 607
 608 #define CATEGORY_MASK_ISO       \
 609   (  CATEGORY_MASK_ISO_7BIT     \
 610      | CATEGORY_MASK_ISO_8BIT   \
 611      | CATEGORY_MASK_ISO_ELSE)
 612
 613 #define CATEGORY_MASK_UTF_16            \
 614   (CATEGORY_MASK_UTF_16_AUTO            \
 615    | CATEGORY_MASK_UTF_16_BE            \
 616    | CATEGORY_MASK_UTF_16_LE            \
 617    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 618    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 619
 620 #define CATEGORY_MASK_UTF_8     \
 621   (CATEGORY_MASK_UTF_8_AUTO     \
 622    | CATEGORY_MASK_UTF_8_NOSIG  \
 623    | CATEGORY_MASK_UTF_8_SIG)
 624
 625 /* Table of coding categories (Lisp symbols).  This variable is for
 626    internal use only.  */
 627 static Lisp_Object Vcoding_category_table;
 628
 629 /* Table of coding-categories ordered by priority.  */
 630 static enum coding_category coding_priorities[coding_category_max];
 631
 632 /* Nth element is a coding context for the coding system bound to the
 633    Nth coding category.  */
 634 static struct coding_system coding_categories[coding_category_max];
 635
 636 /*** Commonly used macros and functions ***/
 637
 638 #ifndef min
 639 #define min(a, b) ((a) < (b) ? (a) : (b))
 640 #endif
 641 #ifndef max
 642 #define max(a, b) ((a) > (b) ? (a) : (b))
 643 #endif
 644
 645 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 646   do {                                                  \
 647     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 648     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 649   } while (0)
 650
 651
 652 /* Safely get one byte from the source text pointed by SRC which ends
 653    at SRC_END, and set C to that byte.  If there are not enough bytes
 654    in the source, it jumps to `no_more_source'.  If multibytep is
 655    nonzero, and a multibyte character is found at SRC, set C to the
 656    negative value of the character code.  The caller should declare
 657    and set these variables appropriately in advance:
 658         src, src_end, multibytep */
 659
 660 #define ONE_MORE_BYTE(c)                                \
 661   do {                                                  \
 662     if (src == src_end)                                 \
 663       {                                                 \
 664         if (src_base < src)                             \
 665           record_conversion_result                      \
 666             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 667         goto no_more_source;                            \
 668       }                                                 \
 669     c = *src++;                                         \
 670     if (multibytep && (c & 0x80))                       \
 671       {                                                 \
 672         if ((c & 0xFE) == 0xC0)                         \
 673           c = ((c & 1) << 6) | *src++;                  \
 674         else                                            \
 675           {                                             \
 676             src--;                                      \
 677             c = - string_char (src, &src, NULL);        \
 678             record_conversion_result                    \
 679               (coding, CODING_RESULT_INVALID_SRC);      \
 680           }                                             \
 681       }                                                 \
 682     consumed_chars++;                                   \
 683   } while (0)
 684
 685 /* Safely get two bytes from the source text pointed by SRC which ends
 686    at SRC_END, and set C1 and C2 to those bytes while skipping the
 687    heading multibyte characters.  If there are not enough bytes in the
 688    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 689    a multibyte character is found for C2, set C2 to the negative value
 690    of the character code.  The caller should declare and set these
 691    variables appropriately in advance:
 692         src, src_end, multibytep
 693    It is intended that this macro is used in detect_coding_utf_16.  */
 694
 695 #define TWO_MORE_BYTES(c1, c2)                          \
 696   do {                                                  \
 697     do {                                                \
 698       if (src == src_end)                               \
 699         goto no_more_source;                            \
 700       c1 = *src++;                                      \
 701       if (multibytep && (c1 & 0x80))                    \
 702         {                                               \
 703           if ((c1 & 0xFE) == 0xC0)                      \
 704             c1 = ((c1 & 1) << 6) | *src++;              \
 705           else                                          \
 706             {                                           \
 707               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 708               c1 = -1;                                  \
 709             }                                           \
 710         }                                               \
 711     } while (c1 < 0);                                   \
 712     if (src == src_end)                                 \
 713       goto no_more_source;                              \
 714     c2 = *src++;                                        \
 715     if (multibytep && (c2 & 0x80))                      \
 716       {                                                 \
 717         if ((c2 & 0xFE) == 0xC0)                        \
 718           c2 = ((c2 & 1) << 6) | *src++;                \
 719         else                                            \
 720           c2 = -1;                                      \
 721       }                                                 \
 722   } while (0)
 723
 724
 725 /* Store a byte C in the place pointed by DST and increment DST to the
 726    next free point, and increment PRODUCED_CHARS.  The caller should
 727    assure that C is 0..127, and declare and set the variable `dst'
 728    appropriately in advance.
 729 */
 730
 731
 732 #define EMIT_ONE_ASCII_BYTE(c)  \
 733   do {                          \
 734     produced_chars++;           \
 735     *dst++ = (c);               \
 736   } while (0)
 737
 738
 739 /* Like EMIT_ONE_ASCII_BYTE but store two bytes; C1 and C2.  */
 740
 741 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 742   do {                                  \
 743     produced_chars += 2;                \
 744     *dst++ = (c1), *dst++ = (c2);       \
 745   } while (0)
 746
 747
 748 /* Store a byte C in the place pointed by DST and increment DST to the
 749    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 750    nonzero, store in an appropriate multibyte from.  The caller should
 751    declare and set the variables `dst' and `multibytep' appropriately
 752    in advance.  */
 753
 754 #define EMIT_ONE_BYTE(c)                \
 755   do {                                  \
 756     produced_chars++;                   \
 757     if (multibytep)                     \
 758       {                                 \
 759         unsigned ch = (c);              \
 760         if (ch >= 0x80)                 \
 761           ch = BYTE8_TO_CHAR (ch);      \
 762         CHAR_STRING_ADVANCE (ch, dst);  \
 763       }                                 \
 764     else                                \
 765       *dst++ = (c);                     \
 766   } while (0)
 767
 768
 769 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 770
 771 #define EMIT_TWO_BYTES(c1, c2)          \
 772   do {                                  \
 773     produced_chars += 2;                \
 774     if (multibytep)                     \
 775       {                                 \
 776         unsigned ch;                    \
 777                                         \
 778         ch = (c1);                      \
 779         if (ch >= 0x80)                 \
 780           ch = BYTE8_TO_CHAR (ch);      \
 781         CHAR_STRING_ADVANCE (ch, dst);  \
 782         ch = (c2);                      \
 783         if (ch >= 0x80)                 \
 784           ch = BYTE8_TO_CHAR (ch);      \
 785         CHAR_STRING_ADVANCE (ch, dst);  \
 786       }                                 \
 787     else                                \
 788       {                                 \
 789         *dst++ = (c1);                  \
 790         *dst++ = (c2);                  \
 791       }                                 \
 792   } while (0)
 793
 794
 795 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 796   do {                                  \
 797     EMIT_ONE_BYTE (c1);                 \
 798     EMIT_TWO_BYTES (c2, c3);            \
 799   } while (0)
 800
 801
 802 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 803   do {                                          \
 804     EMIT_TWO_BYTES (c1, c2);                    \
 805     EMIT_TWO_BYTES (c3, c4);                    \
 806   } while (0)
 807
 808
 809 /* Prototypes for static functions.  */
 810 static void record_conversion_result (struct coding_system *coding,
 811                                       enum coding_result_code result);
 812 static int detect_coding_utf_8 (struct coding_system *,
 813                                 struct coding_detection_info *info);
 814 static void decode_coding_utf_8 (struct coding_system *);
 815 static int encode_coding_utf_8 (struct coding_system *);
 816
 817 static int detect_coding_utf_16 (struct coding_system *,
 818                                  struct coding_detection_info *info);
 819 static void decode_coding_utf_16 (struct coding_system *);
 820 static int encode_coding_utf_16 (struct coding_system *);
 821
 822 static int detect_coding_iso_2022 (struct coding_system *,
 823                                    struct coding_detection_info *info);
 824 static void decode_coding_iso_2022 (struct coding_system *);
 825 static int encode_coding_iso_2022 (struct coding_system *);
 826
 827 static int detect_coding_emacs_mule (struct coding_system *,
 828                                      struct coding_detection_info *info);
 829 static void decode_coding_emacs_mule (struct coding_system *);
 830 static int encode_coding_emacs_mule (struct coding_system *);
 831
 832 static int detect_coding_sjis (struct coding_system *,
 833                                struct coding_detection_info *info);
 834 static void decode_coding_sjis (struct coding_system *);
 835 static int encode_coding_sjis (struct coding_system *);
 836
 837 static int detect_coding_big5 (struct coding_system *,
 838                                struct coding_detection_info *info);
 839 static void decode_coding_big5 (struct coding_system *);
 840 static int encode_coding_big5 (struct coding_system *);
 841
 842 static int detect_coding_ccl (struct coding_system *,
 843                               struct coding_detection_info *info);
 844 static void decode_coding_ccl (struct coding_system *);
 845 static int encode_coding_ccl (struct coding_system *);
 846
 847 static void decode_coding_raw_text (struct coding_system *);
 848 static int encode_coding_raw_text (struct coding_system *);
 849
 850 static EMACS_INT coding_set_source (struct coding_system *);
 851 static EMACS_INT coding_set_destination (struct coding_system *);
 852 static void coding_alloc_by_realloc (struct coding_system *, EMACS_INT);
 853 static void coding_alloc_by_making_gap (struct coding_system *,
 854                                         EMACS_INT, EMACS_INT);
 855 static unsigned char *alloc_destination (struct coding_system *,
 856                                          EMACS_INT, unsigned char *);
 857 static void setup_iso_safe_charsets (Lisp_Object);
 858 static EMACS_INT encode_designation_at_bol (struct coding_system *,
 859                                       int *, int *, unsigned char *);
 860 static int detect_eol (const unsigned char *,
 861                        EMACS_INT, enum coding_category);
 862 static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
 863 static void decode_eol (struct coding_system *);
 864 static Lisp_Object get_translation_table (Lisp_Object, int, int *);
 865 static Lisp_Object get_translation (Lisp_Object, int *, int *);
 866 static int produce_chars (struct coding_system *, Lisp_Object, int);
 867 static inline void produce_charset (struct coding_system *, int *,
 868                                     EMACS_INT);
 869 static void produce_annotation (struct coding_system *, EMACS_INT);
 870 static int decode_coding (struct coding_system *);
 871 static inline int *handle_composition_annotation (EMACS_INT, EMACS_INT,
 872                                                   struct coding_system *,
 873                                                   int *, EMACS_INT *);
 874 static inline int *handle_charset_annotation (EMACS_INT, EMACS_INT,
 875                                               struct coding_system *,
 876                                               int *, EMACS_INT *);
 877 static void consume_chars (struct coding_system *, Lisp_Object, int);
 878 static int encode_coding (struct coding_system *);
 879 static Lisp_Object make_conversion_work_buffer (int);
 880 static Lisp_Object code_conversion_restore (Lisp_Object);
 881 static inline int char_encodable_p (int, Lisp_Object);
 882 static Lisp_Object make_subsidiaries (Lisp_Object);
 883
 884 static void
 885 record_conversion_result (struct coding_system *coding,
 886                           enum coding_result_code result)
 887 {
 888   coding->result = result;
 889   switch (result)
 890     {
 891     case CODING_RESULT_INSUFFICIENT_SRC:
 892       Vlast_code_conversion_error = Qinsufficient_source;
 893       break;
 894     case CODING_RESULT_INCONSISTENT_EOL:
 895       Vlast_code_conversion_error = Qinconsistent_eol;
 896       break;
 897     case CODING_RESULT_INVALID_SRC:
 898       Vlast_code_conversion_error = Qinvalid_source;
 899       break;
 900     case CODING_RESULT_INTERRUPT:
 901       Vlast_code_conversion_error = Qinterrupted;
 902       break;
 903     case CODING_RESULT_INSUFFICIENT_MEM:
 904       Vlast_code_conversion_error = Qinsufficient_memory;
 905       break;
 906     case CODING_RESULT_INSUFFICIENT_DST:
 907       /* Don't record this error in Vlast_code_conversion_error
 908          because it happens just temporarily and is resolved when the
 909          whole conversion is finished.  */
 910       break;
 911     case CODING_RESULT_SUCCESS:
 912       break;
 913     default:
 914       Vlast_code_conversion_error = intern ("Unknown error");
 915     }
 916 }
 917
 918 /* These wrapper macros are used to preserve validity of pointers into
 919    buffer text across calls to decode_char, encode_char, etc, which
 920    could cause relocation of buffers if it loads a charset map,
 921    because loading a charset map allocates large structures.  */
 922
 923 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 924   do {                                                                       \
 925     EMACS_INT offset;                                                        \
 926                                                                              \
 927     charset_map_loaded = 0;                                                  \
 928     c = DECODE_CHAR (charset, code);                                         \
 929     if (charset_map_loaded                                                   \
 930         && (offset = coding_set_source (coding)))                            \
 931       {                                                                      \
 932         src += offset;                                                       \
 933         src_base += offset;                                                  \
 934         src_end += offset;                                                   \
 935       }                                                                      \
 936   } while (0)
 937
 938 #define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code)      \
 939   do {                                                                  \
 940     EMACS_INT offset;                                                   \
 941                                                                         \
 942     charset_map_loaded = 0;                                             \
 943     code = ENCODE_CHAR (charset, c);                                    \
 944     if (charset_map_loaded                                              \
 945         && (offset = coding_set_destination (coding)))                  \
 946       {                                                                 \
 947         dst += offset;                                                  \
 948         dst_end += offset;                                              \
 949       }                                                                 \
 950   } while (0)
 951
 952 #define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \
 953   do {                                                                  \
 954     EMACS_INT offset;                                                   \
 955                                                                         \
 956     charset_map_loaded = 0;                                             \
 957     charset = char_charset (c, charset_list, code_return);              \
 958     if (charset_map_loaded                                              \
 959         && (offset = coding_set_destination (coding)))                  \
 960       {                                                                 \
 961         dst += offset;                                                  \
 962         dst_end += offset;                                              \
 963       }                                                                 \
 964   } while (0)
 965
 966 #define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \
 967   do {                                                                  \
 968     EMACS_INT offset;                                                   \
 969                                                                         \
 970     charset_map_loaded = 0;                                             \
 971     result = CHAR_CHARSET_P (c, charset);                               \
 972     if (charset_map_loaded                                              \
 973         && (offset = coding_set_destination (coding)))                  \
 974       {                                                                 \
 975         dst += offset;                                                  \
 976         dst_end += offset;                                              \
 977       }                                                                 \
 978   } while (0)
 979
 980
 981 /* If there are at least BYTES length of room at dst, allocate memory
 982    for coding->destination and update dst and dst_end.  We don't have
 983    to take care of coding->source which will be relocated.  It is
 984    handled by calling coding_set_source in encode_coding.  */
 985
 986 #define ASSURE_DESTINATION(bytes)                               \
 987   do {                                                          \
 988     if (dst + (bytes) >= dst_end)                               \
 989       {                                                         \
 990         EMACS_INT more_bytes = charbuf_end - charbuf + (bytes); \
 991                                                                 \
 992         dst = alloc_destination (coding, more_bytes, dst);      \
 993         dst_end = coding->destination + coding->dst_bytes;      \
 994       }                                                         \
 995   } while (0)
 996
 997
 998 /* Store multibyte form of the character C in P, and advance P to the
 999    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1000    never calls MAYBE_UNIFY_CHAR.  */
1001
1002 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1003   do {                                          \
1004     if ((c) <= MAX_1_BYTE_CHAR)                 \
1005       *(p)++ = (c);                             \
1006     else if ((c) <= MAX_2_BYTE_CHAR)            \
1007       *(p)++ = (0xC0 | ((c) >> 6)),             \
1008         *(p)++ = (0x80 | ((c) & 0x3F));         \
1009     else if ((c) <= MAX_3_BYTE_CHAR)            \
1010       *(p)++ = (0xE0 | ((c) >> 12)),            \
1011         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1012         *(p)++ = (0x80 | ((c) & 0x3F));         \
1013     else if ((c) <= MAX_4_BYTE_CHAR)            \
1014       *(p)++ = (0xF0 | (c >> 18)),              \
1015         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1016         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1017         *(p)++ = (0x80 | (c & 0x3F));           \
1018     else if ((c) <= MAX_5_BYTE_CHAR)            \
1019       *(p)++ = 0xF8,                            \
1020         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1021         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1022         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1023         *(p)++ = (0x80 | (c & 0x3F));           \
1024     else                                        \
1025       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1026   } while (0)
1027
1028
1029 /* Return the character code of character whose multibyte form is at
1030    P, and advance P to the end of the multibyte form.  This is like
1031    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1032
1033 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1034   (!((p)[0] & 0x80)                                             \
1035    ? *(p)++                                                     \
1036    : ! ((p)[0] & 0x20)                                          \
1037    ? ((p) += 2,                                                 \
1038       ((((p)[-2] & 0x1F) << 6)                                  \
1039        | ((p)[-1] & 0x3F)                                       \
1040        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1041    : ! ((p)[0] & 0x10)                                          \
1042    ? ((p) += 3,                                                 \
1043       ((((p)[-3] & 0x0F) << 12)                                 \
1044        | (((p)[-2] & 0x3F) << 6)                                \
1045        | ((p)[-1] & 0x3F)))                                     \
1046    : ! ((p)[0] & 0x08)                                          \
1047    ? ((p) += 4,                                                 \
1048       ((((p)[-4] & 0xF) << 18)                                  \
1049        | (((p)[-3] & 0x3F) << 12)                               \
1050        | (((p)[-2] & 0x3F) << 6)                                \
1051        | ((p)[-1] & 0x3F)))                                     \
1052    : ((p) += 5,                                                 \
1053       ((((p)[-4] & 0x3F) << 18)                                 \
1054        | (((p)[-3] & 0x3F) << 12)                               \
1055        | (((p)[-2] & 0x3F) << 6)                                \
1056        | ((p)[-1] & 0x3F))))
1057
1058
1059 /* Update coding->source from coding->src_object, and return how many
1060    bytes coding->source was changed.  */
1061
1062 static EMACS_INT
1063 coding_set_source (struct coding_system *coding)
1064 {
1065   const unsigned char *orig = coding->source;
1066
1067   if (BUFFERP (coding->src_object))
1068     {
1069       struct buffer *buf = XBUFFER (coding->src_object);
1070
1071       if (coding->src_pos < 0)
1072         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1073       else
1074         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1075     }
1076   else if (STRINGP (coding->src_object))
1077     {
1078       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1079     }
1080   else
1081     {
1082       /* Otherwise, the source is C string and is never relocated
1083          automatically.  Thus we don't have to update anything.  */
1084     }
1085   return coding->source - orig;
1086 }
1087
1088
1089 /* Update coding->destination from coding->dst_object, and return how
1090    many bytes coding->destination was changed.  */
1091
1092 static EMACS_INT
1093 coding_set_destination (struct coding_system *coding)
1094 {
1095   const unsigned char *orig = coding->destination;
1096
1097   if (BUFFERP (coding->dst_object))
1098     {
1099       if (BUFFERP (coding->src_object) && coding->src_pos < 0)
1100         {
1101           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1102           coding->dst_bytes = (GAP_END_ADDR
1103                                - (coding->src_bytes - coding->consumed)
1104                                - coding->destination);
1105         }
1106       else
1107         {
1108           /* We are sure that coding->dst_pos_byte is before the gap
1109              of the buffer. */
1110           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1111                                  + coding->dst_pos_byte - BEG_BYTE);
1112           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1113                                - coding->destination);
1114         }
1115     }
1116   else
1117     {
1118       /* Otherwise, the destination is C string and is never relocated
1119          automatically.  Thus we don't have to update anything.  */
1120     }
1121   return coding->destination - orig;
1122 }
1123
1124
1125 static void
1126 coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes)
1127 {
1128   if (STRING_BYTES_BOUND - coding->dst_bytes < bytes)
1129     string_overflow ();
1130   coding->destination = (unsigned char *) xrealloc (coding->destination,
1131                                                     coding->dst_bytes + bytes);
1132   coding->dst_bytes += bytes;
1133 }
1134
1135 static void
1136 coding_alloc_by_making_gap (struct coding_system *coding,
1137                             EMACS_INT gap_head_used, EMACS_INT bytes)
1138 {
1139   if (EQ (coding->src_object, coding->dst_object))
1140     {
1141       /* The gap may contain the produced data at the head and not-yet
1142          consumed data at the tail.  To preserve those data, we at
1143          first make the gap size to zero, then increase the gap
1144          size.  */
1145       EMACS_INT add = GAP_SIZE;
1146
1147       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1148       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1149       make_gap (bytes);
1150       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1151       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1152     }
1153   else
1154     {
1155       Lisp_Object this_buffer;
1156
1157       this_buffer = Fcurrent_buffer ();
1158       set_buffer_internal (XBUFFER (coding->dst_object));
1159       make_gap (bytes);
1160       set_buffer_internal (XBUFFER (this_buffer));
1161     }
1162 }
1163
1164
1165 static unsigned char *
1166 alloc_destination (struct coding_system *coding, EMACS_INT nbytes,
1167                    unsigned char *dst)
1168 {
1169   EMACS_INT offset = dst - coding->destination;
1170
1171   if (BUFFERP (coding->dst_object))
1172     {
1173       struct buffer *buf = XBUFFER (coding->dst_object);
1174
1175       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1176     }
1177   else
1178     coding_alloc_by_realloc (coding, nbytes);
1179   coding_set_destination (coding);
1180   dst = coding->destination + offset;
1181   return dst;
1182 }
1183
1184 /** Macros for annotations.  */
1185
1186 /* An annotation data is stored in the array coding->charbuf in this
1187    format:
1188      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1189    LENGTH is the number of elements in the annotation.
1190    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1191    NCHARS is the number of characters in the text annotated.
1192
1193    The format of the following elements depend on ANNOTATION_MASK.
1194
1195    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1196    follows:
1197      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1198
1199    NBYTES is the number of bytes specified in the header part of
1200    old-style emacs-mule encoding, or 0 for the other kind of
1201    composition.
1202
1203    METHOD is one of enum composition_method.
1204
1205    Optional COMPOSITION-COMPONENTS are characters and composition
1206    rules.
1207
1208    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1209    follows.
1210
1211    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1212    recover from an invalid annotation, and should be skipped by
1213    produce_annotation.  */
1214
1215 /* Maximum length of the header of annotation data.  */
1216 #define MAX_ANNOTATION_LENGTH 5
1217
1218 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1219   do {                                                  \
1220     *(buf)++ = -(len);                                  \
1221     *(buf)++ = (mask);                                  \
1222     *(buf)++ = (nchars);                                \
1223     coding->annotated = 1;                              \
1224   } while (0);
1225
1226 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1227   do {                                                                      \
1228     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1229     *buf++ = nbytes;                                                        \
1230     *buf++ = method;                                                        \
1231   } while (0)
1232
1233
1234 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1235   do {                                                                  \
1236     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1237     *buf++ = id;                                                        \
1238   } while (0)
1239
1240 \f
1241 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1242
1243
1244
1245 \f
1246 /*** 3. UTF-8 ***/
1247
1248 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1249    Check if a text is encoded in UTF-8.  If it is, return 1, else
1250    return 0.  */
1251
1252 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1253 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1254 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1255 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1256 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1257 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1258
1259 #define UTF_8_BOM_1 0xEF
1260 #define UTF_8_BOM_2 0xBB
1261 #define UTF_8_BOM_3 0xBF
1262
1263 static int
1264 detect_coding_utf_8 (struct coding_system *coding,
1265                      struct coding_detection_info *detect_info)
1266 {
1267   const unsigned char *src = coding->source, *src_base;
1268   const unsigned char *src_end = coding->source + coding->src_bytes;
1269   int multibytep = coding->src_multibyte;
1270   EMACS_INT consumed_chars = 0;
1271   int bom_found = 0;
1272   int found = 0;
1273
1274   detect_info->checked |= CATEGORY_MASK_UTF_8;
1275   /* A coding system of this category is always ASCII compatible.  */
1276   src += coding->head_ascii;
1277
1278   while (1)
1279     {
1280       int c, c1, c2, c3, c4;
1281
1282       src_base = src;
1283       ONE_MORE_BYTE (c);
1284       if (c < 0 || UTF_8_1_OCTET_P (c))
1285         continue;
1286       ONE_MORE_BYTE (c1);
1287       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1288         break;
1289       if (UTF_8_2_OCTET_LEADING_P (c))
1290         {
1291           found = 1;
1292           continue;
1293         }
1294       ONE_MORE_BYTE (c2);
1295       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1296         break;
1297       if (UTF_8_3_OCTET_LEADING_P (c))
1298         {
1299           found = 1;
1300           if (src_base == coding->source
1301               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1302             bom_found = 1;
1303           continue;
1304         }
1305       ONE_MORE_BYTE (c3);
1306       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1307         break;
1308       if (UTF_8_4_OCTET_LEADING_P (c))
1309         {
1310           found = 1;
1311           continue;
1312         }
1313       ONE_MORE_BYTE (c4);
1314       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1315         break;
1316       if (UTF_8_5_OCTET_LEADING_P (c))
1317         {
1318           found = 1;
1319           continue;
1320         }
1321       break;
1322     }
1323   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1324   return 0;
1325
1326  no_more_source:
1327   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1328     {
1329       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1330       return 0;
1331     }
1332   if (bom_found)
1333     {
1334       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1335       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1336     }
1337   else
1338     {
1339       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1340       if (found)
1341         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1342     }
1343   return 1;
1344 }
1345
1346
1347 static void
1348 decode_coding_utf_8 (struct coding_system *coding)
1349 {
1350   const unsigned char *src = coding->source + coding->consumed;
1351   const unsigned char *src_end = coding->source + coding->src_bytes;
1352   const unsigned char *src_base;
1353   int *charbuf = coding->charbuf + coding->charbuf_used;
1354   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1355   EMACS_INT consumed_chars = 0, consumed_chars_base = 0;
1356   int multibytep = coding->src_multibyte;
1357   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1358   int eol_dos =
1359     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1360   int byte_after_cr = -1;
1361
1362   if (bom != utf_without_bom)
1363     {
1364       int c1, c2, c3;
1365
1366       src_base = src;
1367       ONE_MORE_BYTE (c1);
1368       if (! UTF_8_3_OCTET_LEADING_P (c1))
1369         src = src_base;
1370       else
1371         {
1372           ONE_MORE_BYTE (c2);
1373           if (! UTF_8_EXTRA_OCTET_P (c2))
1374             src = src_base;
1375           else
1376             {
1377               ONE_MORE_BYTE (c3);
1378               if (! UTF_8_EXTRA_OCTET_P (c3))
1379                 src = src_base;
1380               else
1381                 {
1382                   if ((c1 != UTF_8_BOM_1)
1383                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1384                     src = src_base;
1385                   else
1386                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1387                 }
1388             }
1389         }
1390     }
1391   CODING_UTF_8_BOM (coding) = utf_without_bom;
1392
1393   while (1)
1394     {
1395       int c, c1, c2, c3, c4, c5;
1396
1397       src_base = src;
1398       consumed_chars_base = consumed_chars;
1399
1400       if (charbuf >= charbuf_end)
1401         {
1402           if (byte_after_cr >= 0)
1403             src_base--;
1404           break;
1405         }
1406
1407       if (byte_after_cr >= 0)
1408         c1 = byte_after_cr, byte_after_cr = -1;
1409       else
1410         ONE_MORE_BYTE (c1);
1411       if (c1 < 0)
1412         {
1413           c = - c1;
1414         }
1415       else if (UTF_8_1_OCTET_P (c1))
1416         {
1417           if (eol_dos && c1 == '\r')
1418             ONE_MORE_BYTE (byte_after_cr);
1419           c = c1;
1420         }
1421       else
1422         {
1423           ONE_MORE_BYTE (c2);
1424           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1425             goto invalid_code;
1426           if (UTF_8_2_OCTET_LEADING_P (c1))
1427             {
1428               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1429               /* Reject overlong sequences here and below.  Encoders
1430                  producing them are incorrect, they can be misleading,
1431                  and they mess up read/write invariance.  */
1432               if (c < 128)
1433                 goto invalid_code;
1434             }
1435           else
1436             {
1437               ONE_MORE_BYTE (c3);
1438               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1439                 goto invalid_code;
1440               if (UTF_8_3_OCTET_LEADING_P (c1))
1441                 {
1442                   c = (((c1 & 0xF) << 12)
1443                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1444                   if (c < 0x800
1445                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1446                     goto invalid_code;
1447                 }
1448               else
1449                 {
1450                   ONE_MORE_BYTE (c4);
1451                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1452                     goto invalid_code;
1453                   if (UTF_8_4_OCTET_LEADING_P (c1))
1454                     {
1455                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1456                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1457                     if (c < 0x10000)
1458                       goto invalid_code;
1459                     }
1460                   else
1461                     {
1462                       ONE_MORE_BYTE (c5);
1463                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1464                         goto invalid_code;
1465                       if (UTF_8_5_OCTET_LEADING_P (c1))
1466                         {
1467                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1468                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1469                                | (c5 & 0x3F));
1470                           if ((c > MAX_CHAR) || (c < 0x200000))
1471                             goto invalid_code;
1472                         }
1473                       else
1474                         goto invalid_code;
1475                     }
1476                 }
1477             }
1478         }
1479
1480       *charbuf++ = c;
1481       continue;
1482
1483     invalid_code:
1484       src = src_base;
1485       consumed_chars = consumed_chars_base;
1486       ONE_MORE_BYTE (c);
1487       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1488       coding->errors++;
1489     }
1490
1491  no_more_source:
1492   coding->consumed_char += consumed_chars_base;
1493   coding->consumed = src_base - coding->source;
1494   coding->charbuf_used = charbuf - coding->charbuf;
1495 }
1496
1497
1498 static int
1499 encode_coding_utf_8 (struct coding_system *coding)
1500 {
1501   int multibytep = coding->dst_multibyte;
1502   int *charbuf = coding->charbuf;
1503   int *charbuf_end = charbuf + coding->charbuf_used;
1504   unsigned char *dst = coding->destination + coding->produced;
1505   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1506   EMACS_INT produced_chars = 0;
1507   int c;
1508
1509   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1510     {
1511       ASSURE_DESTINATION (3);
1512       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1513       CODING_UTF_8_BOM (coding) = utf_without_bom;
1514     }
1515
1516   if (multibytep)
1517     {
1518       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1519
1520       while (charbuf < charbuf_end)
1521         {
1522           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1523
1524           ASSURE_DESTINATION (safe_room);
1525           c = *charbuf++;
1526           if (CHAR_BYTE8_P (c))
1527             {
1528               c = CHAR_TO_BYTE8 (c);
1529               EMIT_ONE_BYTE (c);
1530             }
1531           else
1532             {
1533               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1534               for (p = str; p < pend; p++)
1535                 EMIT_ONE_BYTE (*p);
1536             }
1537         }
1538     }
1539   else
1540     {
1541       int safe_room = MAX_MULTIBYTE_LENGTH;
1542
1543       while (charbuf < charbuf_end)
1544         {
1545           ASSURE_DESTINATION (safe_room);
1546           c = *charbuf++;
1547           if (CHAR_BYTE8_P (c))
1548             *dst++ = CHAR_TO_BYTE8 (c);
1549           else
1550             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1551           produced_chars++;
1552         }
1553     }
1554   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1555   coding->produced_char += produced_chars;
1556   coding->produced = dst - coding->destination;
1557   return 0;
1558 }
1559
1560
1561 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1562    Check if a text is encoded in one of UTF-16 based coding systems.
1563    If it is, return 1, else return 0.  */
1564
1565 #define UTF_16_HIGH_SURROGATE_P(val) \
1566   (((val) & 0xFC00) == 0xD800)
1567
1568 #define UTF_16_LOW_SURROGATE_P(val) \
1569   (((val) & 0xFC00) == 0xDC00)
1570
1571
1572 static int
1573 detect_coding_utf_16 (struct coding_system *coding,
1574                       struct coding_detection_info *detect_info)
1575 {
1576   const unsigned char *src = coding->source;
1577   const unsigned char *src_end = coding->source + coding->src_bytes;
1578   int multibytep = coding->src_multibyte;
1579   int c1, c2;
1580
1581   detect_info->checked |= CATEGORY_MASK_UTF_16;
1582   if (coding->mode & CODING_MODE_LAST_BLOCK
1583       && (coding->src_chars & 1))
1584     {
1585       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1586       return 0;
1587     }
1588
1589   TWO_MORE_BYTES (c1, c2);
1590   if ((c1 == 0xFF) && (c2 == 0xFE))
1591     {
1592       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1593                              | CATEGORY_MASK_UTF_16_AUTO);
1594       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1595                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1596                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1597     }
1598   else if ((c1 == 0xFE) && (c2 == 0xFF))
1599     {
1600       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1601                              | CATEGORY_MASK_UTF_16_AUTO);
1602       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1603                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1604                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1605     }
1606   else if (c2 < 0)
1607     {
1608       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1609       return 0;
1610     }
1611   else
1612     {
1613       /* We check the dispersion of Eth and Oth bytes where E is even and
1614          O is odd.  If both are high, we assume binary data.*/
1615       unsigned char e[256], o[256];
1616       unsigned e_num = 1, o_num = 1;
1617
1618       memset (e, 0, 256);
1619       memset (o, 0, 256);
1620       e[c1] = 1;
1621       o[c2] = 1;
1622
1623       detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1624                                 |CATEGORY_MASK_UTF_16_BE
1625                                 | CATEGORY_MASK_UTF_16_LE);
1626
1627       while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1628              != CATEGORY_MASK_UTF_16)
1629         {
1630           TWO_MORE_BYTES (c1, c2);
1631           if (c2 < 0)
1632             break;
1633           if (! e[c1])
1634             {
1635               e[c1] = 1;
1636               e_num++;
1637               if (e_num >= 128)
1638                 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1639             }
1640           if (! o[c2])
1641             {
1642               o[c2] = 1;
1643               o_num++;
1644               if (o_num >= 128)
1645                 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1646             }
1647         }
1648       return 0;
1649     }
1650
1651  no_more_source:
1652   return 1;
1653 }
1654
1655 static void
1656 decode_coding_utf_16 (struct coding_system *coding)
1657 {
1658   const unsigned char *src = coding->source + coding->consumed;
1659   const unsigned char *src_end = coding->source + coding->src_bytes;
1660   const unsigned char *src_base;
1661   int *charbuf = coding->charbuf + coding->charbuf_used;
1662   /* We may produces at most 3 chars in one loop.  */
1663   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1664   EMACS_INT consumed_chars = 0, consumed_chars_base = 0;
1665   int multibytep = coding->src_multibyte;
1666   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1667   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1668   int surrogate = CODING_UTF_16_SURROGATE (coding);
1669   int eol_dos =
1670     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1671   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1672
1673   if (bom == utf_with_bom)
1674     {
1675       int c, c1, c2;
1676
1677       src_base = src;
1678       ONE_MORE_BYTE (c1);
1679       ONE_MORE_BYTE (c2);
1680       c = (c1 << 8) | c2;
1681
1682       if (endian == utf_16_big_endian
1683           ? c != 0xFEFF : c != 0xFFFE)
1684         {
1685           /* The first two bytes are not BOM.  Treat them as bytes
1686              for a normal character.  */
1687           src = src_base;
1688           coding->errors++;
1689         }
1690       CODING_UTF_16_BOM (coding) = utf_without_bom;
1691     }
1692   else if (bom == utf_detect_bom)
1693     {
1694       /* We have already tried to detect BOM and failed in
1695          detect_coding.  */
1696       CODING_UTF_16_BOM (coding) = utf_without_bom;
1697     }
1698
1699   while (1)
1700     {
1701       int c, c1, c2;
1702
1703       src_base = src;
1704       consumed_chars_base = consumed_chars;
1705
1706       if (charbuf >= charbuf_end)
1707         {
1708           if (byte_after_cr1 >= 0)
1709             src_base -= 2;
1710           break;
1711         }
1712
1713       if (byte_after_cr1 >= 0)
1714         c1 = byte_after_cr1, byte_after_cr1 = -1;
1715       else
1716         ONE_MORE_BYTE (c1);
1717       if (c1 < 0)
1718         {
1719           *charbuf++ = -c1;
1720           continue;
1721         }
1722       if (byte_after_cr2 >= 0)
1723         c2 = byte_after_cr2, byte_after_cr2 = -1;
1724       else
1725         ONE_MORE_BYTE (c2);
1726       if (c2 < 0)
1727         {
1728           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1729           *charbuf++ = -c2;
1730           continue;
1731         }
1732       c = (endian == utf_16_big_endian
1733            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1734
1735       if (surrogate)
1736         {
1737           if (! UTF_16_LOW_SURROGATE_P (c))
1738             {
1739               if (endian == utf_16_big_endian)
1740                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1741               else
1742                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1743               *charbuf++ = c1;
1744               *charbuf++ = c2;
1745               coding->errors++;
1746               if (UTF_16_HIGH_SURROGATE_P (c))
1747                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1748               else
1749                 *charbuf++ = c;
1750             }
1751           else
1752             {
1753               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1754               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1755               *charbuf++ = 0x10000 + c;
1756             }
1757         }
1758       else
1759         {
1760           if (UTF_16_HIGH_SURROGATE_P (c))
1761             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1762           else
1763             {
1764               if (eol_dos && c == '\r')
1765                 {
1766                   ONE_MORE_BYTE (byte_after_cr1);
1767                   ONE_MORE_BYTE (byte_after_cr2);
1768                 }
1769               *charbuf++ = c;
1770             }
1771         }
1772     }
1773
1774  no_more_source:
1775   coding->consumed_char += consumed_chars_base;
1776   coding->consumed = src_base - coding->source;
1777   coding->charbuf_used = charbuf - coding->charbuf;
1778 }
1779
1780 static int
1781 encode_coding_utf_16 (struct coding_system *coding)
1782 {
1783   int multibytep = coding->dst_multibyte;
1784   int *charbuf = coding->charbuf;
1785   int *charbuf_end = charbuf + coding->charbuf_used;
1786   unsigned char *dst = coding->destination + coding->produced;
1787   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1788   int safe_room = 8;
1789   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1790   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1791   EMACS_INT produced_chars = 0;
1792   int c;
1793
1794   if (bom != utf_without_bom)
1795     {
1796       ASSURE_DESTINATION (safe_room);
1797       if (big_endian)
1798         EMIT_TWO_BYTES (0xFE, 0xFF);
1799       else
1800         EMIT_TWO_BYTES (0xFF, 0xFE);
1801       CODING_UTF_16_BOM (coding) = utf_without_bom;
1802     }
1803
1804   while (charbuf < charbuf_end)
1805     {
1806       ASSURE_DESTINATION (safe_room);
1807       c = *charbuf++;
1808       if (c > MAX_UNICODE_CHAR)
1809         c = coding->default_char;
1810
1811       if (c < 0x10000)
1812         {
1813           if (big_endian)
1814             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1815           else
1816             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1817         }
1818       else
1819         {
1820           int c1, c2;
1821
1822           c -= 0x10000;
1823           c1 = (c >> 10) + 0xD800;
1824           c2 = (c & 0x3FF) + 0xDC00;
1825           if (big_endian)
1826             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1827           else
1828             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1829         }
1830     }
1831   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1832   coding->produced = dst - coding->destination;
1833   coding->produced_char += produced_chars;
1834   return 0;
1835 }
1836
1837 \f
1838 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1839
1840 /* Emacs' internal format for representation of multiple character
1841    sets is a kind of multi-byte encoding, i.e. characters are
1842    represented by variable-length sequences of one-byte codes.
1843
1844    ASCII characters and control characters (e.g. `tab', `newline') are
1845    represented by one-byte sequences which are their ASCII codes, in
1846    the range 0x00 through 0x7F.
1847
1848    8-bit characters of the range 0x80..0x9F are represented by
1849    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1850    code + 0x20).
1851
1852    8-bit characters of the range 0xA0..0xFF are represented by
1853    one-byte sequences which are their 8-bit code.
1854
1855    The other characters are represented by a sequence of `base
1856    leading-code', optional `extended leading-code', and one or two
1857    `position-code's.  The length of the sequence is determined by the
1858    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1859    whereas extended leading-code and position-code take the range 0xA0
1860    through 0xFF.  See `charset.h' for more details about leading-code
1861    and position-code.
1862
1863    --- CODE RANGE of Emacs' internal format ---
1864    character set        range
1865    -------------        -----
1866    ascii                0x00..0x7F
1867    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1868    eight-bit-graphic    0xA0..0xBF
1869    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1870    ---------------------------------------------
1871
1872    As this is the internal character representation, the format is
1873    usually not used externally (i.e. in a file or in a data sent to a
1874    process).  But, it is possible to have a text externally in this
1875    format (i.e. by encoding by the coding system `emacs-mule').
1876
1877    In that case, a sequence of one-byte codes has a slightly different
1878    form.
1879
1880    At first, all characters in eight-bit-control are represented by
1881    one-byte sequences which are their 8-bit code.
1882
1883    Next, character composition data are represented by the byte
1884    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1885    where,
1886         METHOD is 0xF2 plus one of composition method (enum
1887         composition_method),
1888
1889         BYTES is 0xA0 plus a byte length of this composition data,
1890
1891         CHARS is 0xA0 plus a number of characters composed by this
1892         data,
1893
1894         COMPONENTs are characters of multibyte form or composition
1895         rules encoded by two-byte of ASCII codes.
1896
1897    In addition, for backward compatibility, the following formats are
1898    also recognized as composition data on decoding.
1899
1900    0x80 MSEQ ...
1901    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1902
1903    Here,
1904         MSEQ is a multibyte form but in these special format:
1905           ASCII: 0xA0 ASCII_CODE+0x80,
1906           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1907         RULE is a one byte code of the range 0xA0..0xF0 that
1908         represents a composition rule.
1909   */
1910
1911 char emacs_mule_bytes[256];
1912
1913
1914 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1915    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1916    else return 0.  */
1917
1918 static int
1919 detect_coding_emacs_mule (struct coding_system *coding,
1920                           struct coding_detection_info *detect_info)
1921 {
1922   const unsigned char *src = coding->source, *src_base;
1923   const unsigned char *src_end = coding->source + coding->src_bytes;
1924   int multibytep = coding->src_multibyte;
1925   EMACS_INT consumed_chars = 0;
1926   int c;
1927   int found = 0;
1928
1929   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1930   /* A coding system of this category is always ASCII compatible.  */
1931   src += coding->head_ascii;
1932
1933   while (1)
1934     {
1935       src_base = src;
1936       ONE_MORE_BYTE (c);
1937       if (c < 0)
1938         continue;
1939       if (c == 0x80)
1940         {
1941           /* Perhaps the start of composite character.  We simply skip
1942              it because analyzing it is too heavy for detecting.  But,
1943              at least, we check that the composite character
1944              constitutes of more than 4 bytes.  */
1945           const unsigned char *src_start;
1946
1947         repeat:
1948           src_start = src;
1949           do
1950             {
1951               ONE_MORE_BYTE (c);
1952             }
1953           while (c >= 0xA0);
1954
1955           if (src - src_start <= 4)
1956             break;
1957           found = CATEGORY_MASK_EMACS_MULE;
1958           if (c == 0x80)
1959             goto repeat;
1960         }
1961
1962       if (c < 0x80)
1963         {
1964           if (c < 0x20
1965               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1966             break;
1967         }
1968       else
1969         {
1970           int more_bytes = emacs_mule_bytes[c] - 1;
1971
1972           while (more_bytes > 0)
1973             {
1974               ONE_MORE_BYTE (c);
1975               if (c < 0xA0)
1976                 {
1977                   src--;        /* Unread the last byte.  */
1978                   break;
1979                 }
1980               more_bytes--;
1981             }
1982           if (more_bytes != 0)
1983             break;
1984           found = CATEGORY_MASK_EMACS_MULE;
1985         }
1986     }
1987   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1988   return 0;
1989
1990  no_more_source:
1991   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1992     {
1993       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1994       return 0;
1995     }
1996   detect_info->found |= found;
1997   return 1;
1998 }
1999
2000
2001 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2002    character.  If CMP_STATUS indicates that we must expect MSEQ or
2003    RULE described above, decode it and return the negative value of
2004    the decoded character or rule.  If an invalid byte is found, return
2005    -1.  If SRC is too short, return -2.  */
2006
2007 static int
2008 emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2009                  int *nbytes, int *nchars, int *id,
2010                  struct composition_status *cmp_status)
2011 {
2012   const unsigned char *src_end = coding->source + coding->src_bytes;
2013   const unsigned char *src_base = src;
2014   int multibytep = coding->src_multibyte;
2015   int charset_ID;
2016   unsigned code;
2017   int c;
2018   int consumed_chars = 0;
2019   int mseq_found = 0;
2020
2021   ONE_MORE_BYTE (c);
2022   if (c < 0)
2023     {
2024       c = -c;
2025       charset_ID = emacs_mule_charset[0];
2026     }
2027   else
2028     {
2029       if (c >= 0xA0)
2030         {
2031           if (cmp_status->state != COMPOSING_NO
2032               && cmp_status->old_form)
2033             {
2034               if (cmp_status->state == COMPOSING_CHAR)
2035                 {
2036                   if (c == 0xA0)
2037                     {
2038                       ONE_MORE_BYTE (c);
2039                       c -= 0x80;
2040                       if (c < 0)
2041                         goto invalid_code;
2042                     }
2043                   else
2044                     c -= 0x20;
2045                   mseq_found = 1;
2046                 }
2047               else
2048                 {
2049                   *nbytes = src - src_base;
2050                   *nchars = consumed_chars;
2051                   return -c;
2052                 }
2053             }
2054           else
2055             goto invalid_code;
2056         }
2057
2058       switch (emacs_mule_bytes[c])
2059         {
2060         case 2:
2061           if ((charset_ID = emacs_mule_charset[c]) < 0)
2062             goto invalid_code;
2063           ONE_MORE_BYTE (c);
2064           if (c < 0xA0)
2065             goto invalid_code;
2066           code = c & 0x7F;
2067           break;
2068
2069         case 3:
2070           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2071               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2072             {
2073               ONE_MORE_BYTE (c);
2074               if (c < 0xA0 || (charset_ID = emacs_mule_charset[c]) < 0)
2075                 goto invalid_code;
2076               ONE_MORE_BYTE (c);
2077               if (c < 0xA0)
2078                 goto invalid_code;
2079               code = c & 0x7F;
2080             }
2081           else
2082             {
2083               if ((charset_ID = emacs_mule_charset[c]) < 0)
2084                 goto invalid_code;
2085               ONE_MORE_BYTE (c);
2086               if (c < 0xA0)
2087                 goto invalid_code;
2088               code = (c & 0x7F) << 8;
2089               ONE_MORE_BYTE (c);
2090               if (c < 0xA0)
2091                 goto invalid_code;
2092               code |= c & 0x7F;
2093             }
2094           break;
2095
2096         case 4:
2097           ONE_MORE_BYTE (c);
2098           if (c < 0 || (charset_ID = emacs_mule_charset[c]) < 0)
2099             goto invalid_code;
2100           ONE_MORE_BYTE (c);
2101           if (c < 0xA0)
2102             goto invalid_code;
2103           code = (c & 0x7F) << 8;
2104           ONE_MORE_BYTE (c);
2105           if (c < 0xA0)
2106             goto invalid_code;
2107           code |= c & 0x7F;
2108           break;
2109
2110         case 1:
2111           code = c;
2112           charset_ID = ASCII_BYTE_P (code) ? charset_ascii : charset_eight_bit;
2113           break;
2114
2115         default:
2116           abort ();
2117         }
2118       CODING_DECODE_CHAR (coding, src, src_base, src_end,
2119                           CHARSET_FROM_ID (charset_ID), code, c);
2120       if (c < 0)
2121         goto invalid_code;
2122     }
2123   *nbytes = src - src_base;
2124   *nchars = consumed_chars;
2125   if (id)
2126     *id = charset_ID;
2127   return (mseq_found ? -c : c);
2128
2129  no_more_source:
2130   return -2;
2131
2132  invalid_code:
2133   return -1;
2134 }
2135
2136
2137 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2138
2139 /* Handle these composition sequence ('|': the end of header elements,
2140    BYTES and CHARS >= 0xA0):
2141
2142    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2143    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2144    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2145
2146    and these old form:
2147
2148    (4) relative composition: 0x80 | MSEQ ... MSEQ
2149    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2150
2151    When the starter 0x80 and the following header elements are found,
2152    this annotation header is produced.
2153
2154         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2155
2156    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2157    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2158
2159    Then, upon reading the following elements, these codes are produced
2160    until the composition end is found:
2161
2162    (1) CHAR ... CHAR
2163    (2) ALT ... ALT CHAR ... CHAR
2164    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2165    (4) CHAR ... CHAR
2166    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2167
2168    When the composition end is found, LENGTH and NCHARS in the
2169    annotation header is updated as below:
2170
2171    (1) LENGTH: unchanged, NCHARS: unchanged
2172    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2173    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2174    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2175    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2176
2177    If an error is found while composing, the annotation header is
2178    changed to the original composition header (plus filler -1s) as
2179    below:
2180
2181    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2182    (5)          [ 0x80 0xFF -1 -1- -1 ]
2183
2184    and the sequence [ -2 DECODED-RULE ] is changed to the original
2185    byte sequence as below:
2186         o the original byte sequence is B: [ B -1 ]
2187         o the original byte sequence is B1 B2: [ B1 B2 ]
2188
2189    Most of the routines are implemented by macros because many
2190    variables and labels in the caller decode_coding_emacs_mule must be
2191    accessible, and they are usually called just once (thus doesn't
2192    increase the size of compiled object).  */
2193
2194 /* Decode a composition rule represented by C as a component of
2195    composition sequence of Emacs 20 style.  Set RULE to the decoded
2196    rule. */
2197
2198 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2199   do {                                                  \
2200     int gref, nref;                                     \
2201                                                         \
2202     c -= 0xA0;                                          \
2203     if (c < 0 || c >= 81)                               \
2204       goto invalid_code;                                \
2205     gref = c / 9, nref = c % 9;                         \
2206     if (gref == 4) gref = 10;                           \
2207     if (nref == 4) nref = 10;                           \
2208     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2209   } while (0)
2210
2211
2212 /* Decode a composition rule represented by C and the following byte
2213    at SRC as a component of composition sequence of Emacs 21 style.
2214    Set RULE to the decoded rule.  */
2215
2216 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2217   do {                                                  \
2218     int gref, nref;                                     \
2219                                                         \
2220     gref = c - 0x20;                                    \
2221     if (gref < 0 || gref >= 81)                         \
2222       goto invalid_code;                                \
2223     ONE_MORE_BYTE (c);                                  \
2224     nref = c - 0x20;                                    \
2225     if (nref < 0 || nref >= 81)                         \
2226       goto invalid_code;                                \
2227     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2228   } while (0)
2229
2230
2231 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2232    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2233    byte length of this composition information, CHARS is the number of
2234    characters composed by this composition.  */
2235
2236 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2237   do {                                                                  \
2238     enum composition_method method = c - 0xF2;                          \
2239     int nbytes, nchars;                                                 \
2240                                                                         \
2241     ONE_MORE_BYTE (c);                                                  \
2242     if (c < 0)                                                          \
2243       goto invalid_code;                                                \
2244     nbytes = c - 0xA0;                                                  \
2245     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2246       goto invalid_code;                                                \
2247     ONE_MORE_BYTE (c);                                                  \
2248     nchars = c - 0xA0;                                                  \
2249     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2250       goto invalid_code;                                                \
2251     cmp_status->old_form = 0;                                           \
2252     cmp_status->method = method;                                        \
2253     if (method == COMPOSITION_RELATIVE)                                 \
2254       cmp_status->state = COMPOSING_CHAR;                               \
2255     else                                                                \
2256       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2257     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2258     cmp_status->nchars = nchars;                                        \
2259     cmp_status->ncomps = nbytes - 4;                                    \
2260     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2261   } while (0)
2262
2263
2264 /* Start of Emacs 20 style format for relative composition.  */
2265
2266 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2267   do {                                                          \
2268     cmp_status->old_form = 1;                                   \
2269     cmp_status->method = COMPOSITION_RELATIVE;                  \
2270     cmp_status->state = COMPOSING_CHAR;                         \
2271     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2272     cmp_status->nchars = cmp_status->ncomps = 0;                \
2273     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2274   } while (0)
2275
2276
2277 /* Start of Emacs 20 style format for rule-base composition.  */
2278
2279 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2280   do {                                                          \
2281     cmp_status->old_form = 1;                                   \
2282     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2283     cmp_status->state = COMPOSING_CHAR;                         \
2284     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2285     cmp_status->nchars = cmp_status->ncomps = 0;                \
2286     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2287   } while (0)
2288
2289
2290 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2291   do {                                                  \
2292     const unsigned char *current_src = src;             \
2293                                                         \
2294     ONE_MORE_BYTE (c);                                  \
2295     if (c < 0)                                          \
2296       goto invalid_code;                                \
2297     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2298         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2299       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2300     else if (c < 0xA0)                                  \
2301       goto invalid_code;                                \
2302     else if (c < 0xC0)                                  \
2303       {                                                 \
2304         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2305         /* Re-read C as a composition component.  */    \
2306         src = current_src;                              \
2307       }                                                 \
2308     else if (c == 0xFF)                                 \
2309       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2310     else                                                \
2311       goto invalid_code;                                \
2312   } while (0)
2313
2314 #define EMACS_MULE_COMPOSITION_END()                            \
2315   do {                                                          \
2316     int idx = - cmp_status->length;                             \
2317                                                                 \
2318     if (cmp_status->old_form)                                   \
2319       charbuf[idx + 2] = cmp_status->nchars;                    \
2320     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2321       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2322     cmp_status->state = COMPOSING_NO;                           \
2323   } while (0)
2324
2325
2326 static int
2327 emacs_mule_finish_composition (int *charbuf,
2328                                struct composition_status *cmp_status)
2329 {
2330   int idx = - cmp_status->length;
2331   int new_chars;
2332
2333   if (cmp_status->old_form && cmp_status->nchars > 0)
2334     {
2335       charbuf[idx + 2] = cmp_status->nchars;
2336       new_chars = 0;
2337       if (cmp_status->method == COMPOSITION_WITH_RULE
2338           && cmp_status->state == COMPOSING_CHAR)
2339         {
2340           /* The last rule was invalid.  */
2341           int rule = charbuf[-1] + 0xA0;
2342
2343           charbuf[-2] = BYTE8_TO_CHAR (rule);
2344           charbuf[-1] = -1;
2345           new_chars = 1;
2346         }
2347     }
2348   else
2349     {
2350       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2351
2352       if (cmp_status->method == COMPOSITION_WITH_RULE)
2353         {
2354           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2355           charbuf[idx++] = -3;
2356           charbuf[idx++] = 0;
2357           new_chars = 1;
2358         }
2359       else
2360         {
2361           int nchars = charbuf[idx + 1] + 0xA0;
2362           int nbytes = charbuf[idx + 2] + 0xA0;
2363
2364           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2365           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2366           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2367           charbuf[idx++] = -1;
2368           new_chars = 4;
2369         }
2370     }
2371   cmp_status->state = COMPOSING_NO;
2372   return new_chars;
2373 }
2374
2375 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2376   do {                                                                    \
2377     if (cmp_status->state != COMPOSING_NO)                                \
2378       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2379   } while (0)
2380
2381
2382 static void
2383 decode_coding_emacs_mule (struct coding_system *coding)
2384 {
2385   const unsigned char *src = coding->source + coding->consumed;
2386   const unsigned char *src_end = coding->source + coding->src_bytes;
2387   const unsigned char *src_base;
2388   int *charbuf = coding->charbuf + coding->charbuf_used;
2389   /* We may produce two annotations (charset and composition) in one
2390      loop and one more charset annotation at the end.  */
2391   int *charbuf_end
2392     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3)
2393       /* We can produce up to 2 characters in a loop.  */
2394       - 1;
2395   EMACS_INT consumed_chars = 0, consumed_chars_base;
2396   int multibytep = coding->src_multibyte;
2397   EMACS_INT char_offset = coding->produced_char;
2398   EMACS_INT last_offset = char_offset;
2399   int last_id = charset_ascii;
2400   int eol_dos =
2401     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2402   int byte_after_cr = -1;
2403   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2404
2405   if (cmp_status->state != COMPOSING_NO)
2406     {
2407       int i;
2408
2409       if (charbuf_end - charbuf < cmp_status->length)
2410         abort ();
2411       for (i = 0; i < cmp_status->length; i++)
2412         *charbuf++ = cmp_status->carryover[i];
2413       coding->annotated = 1;
2414     }
2415
2416   while (1)
2417     {
2418       int c, id IF_LINT (= 0);
2419
2420       src_base = src;
2421       consumed_chars_base = consumed_chars;
2422
2423       if (charbuf >= charbuf_end)
2424         {
2425           if (byte_after_cr >= 0)
2426             src_base--;
2427           break;
2428         }
2429
2430       if (byte_after_cr >= 0)
2431         c = byte_after_cr, byte_after_cr = -1;
2432       else
2433         ONE_MORE_BYTE (c);
2434
2435       if (c < 0 || c == 0x80)
2436         {
2437           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2438           if (c < 0)
2439             {
2440               *charbuf++ = -c;
2441               char_offset++;
2442             }
2443           else
2444             DECODE_EMACS_MULE_COMPOSITION_START ();
2445           continue;
2446         }
2447
2448       if (c < 0x80)
2449         {
2450           if (eol_dos && c == '\r')
2451             ONE_MORE_BYTE (byte_after_cr);
2452           id = charset_ascii;
2453           if (cmp_status->state != COMPOSING_NO)
2454             {
2455               if (cmp_status->old_form)
2456                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2457               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2458                 cmp_status->ncomps--;
2459             }
2460         }
2461       else
2462         {
2463           int nchars IF_LINT (= 0), nbytes IF_LINT (= 0);
2464           /* emacs_mule_char can load a charset map from a file, which
2465              allocates a large structure and might cause buffer text
2466              to be relocated as result.  Thus, we need to remember the
2467              original pointer to buffer text, and fix up all related
2468              pointers after the call.  */
2469           const unsigned char *orig = coding->source;
2470           EMACS_INT offset;
2471
2472           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2473                                cmp_status);
2474           offset = coding->source - orig;
2475           if (offset)
2476             {
2477               src += offset;
2478               src_base += offset;
2479               src_end += offset;
2480             }
2481           if (c < 0)
2482             {
2483               if (c == -1)
2484                 goto invalid_code;
2485               if (c == -2)
2486                 break;
2487             }
2488           src = src_base + nbytes;
2489           consumed_chars = consumed_chars_base + nchars;
2490           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2491             cmp_status->ncomps -= nchars;
2492         }
2493
2494       /* Now if C >= 0, we found a normally encoded character, if C <
2495          0, we found an old-style composition component character or
2496          rule.  */
2497
2498       if (cmp_status->state == COMPOSING_NO)
2499         {
2500           if (last_id != id)
2501             {
2502               if (last_id != charset_ascii)
2503                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2504                                   last_id);
2505               last_id = id;
2506               last_offset = char_offset;
2507             }
2508           *charbuf++ = c;
2509           char_offset++;
2510         }
2511       else if (cmp_status->state == COMPOSING_CHAR)
2512         {
2513           if (cmp_status->old_form)
2514             {
2515               if (c >= 0)
2516                 {
2517                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2518                   *charbuf++ = c;
2519                   char_offset++;
2520                 }
2521               else
2522                 {
2523                   *charbuf++ = -c;
2524                   cmp_status->nchars++;
2525                   cmp_status->length++;
2526                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2527                     EMACS_MULE_COMPOSITION_END ();
2528                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2529                     cmp_status->state = COMPOSING_RULE;
2530                 }
2531             }
2532           else
2533             {
2534               *charbuf++ = c;
2535               cmp_status->length++;
2536               cmp_status->nchars--;
2537               if (cmp_status->nchars == 0)
2538                 EMACS_MULE_COMPOSITION_END ();
2539             }
2540         }
2541       else if (cmp_status->state == COMPOSING_RULE)
2542         {
2543           int rule;
2544
2545           if (c >= 0)
2546             {
2547               EMACS_MULE_COMPOSITION_END ();
2548               *charbuf++ = c;
2549               char_offset++;
2550             }
2551           else
2552             {
2553               c = -c;
2554               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2555               if (rule < 0)
2556                 goto invalid_code;
2557               *charbuf++ = -2;
2558               *charbuf++ = rule;
2559               cmp_status->length += 2;
2560               cmp_status->state = COMPOSING_CHAR;
2561             }
2562         }
2563       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2564         {
2565           *charbuf++ = c;
2566           cmp_status->length++;
2567           if (cmp_status->ncomps == 0)
2568             cmp_status->state = COMPOSING_CHAR;
2569           else if (cmp_status->ncomps > 0)
2570             {
2571               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2572                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2573             }
2574           else
2575             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2576         }
2577       else                      /* COMPOSING_COMPONENT_RULE */
2578         {
2579           int rule;
2580
2581           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2582           if (rule < 0)
2583             goto invalid_code;
2584           *charbuf++ = -2;
2585           *charbuf++ = rule;
2586           cmp_status->length += 2;
2587           cmp_status->ncomps--;
2588           if (cmp_status->ncomps > 0)
2589             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2590           else
2591             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2592         }
2593       continue;
2594
2595     invalid_code:
2596       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2597       src = src_base;
2598       consumed_chars = consumed_chars_base;
2599       ONE_MORE_BYTE (c);
2600       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2601       char_offset++;
2602       coding->errors++;
2603     }
2604
2605  no_more_source:
2606   if (cmp_status->state != COMPOSING_NO)
2607     {
2608       if (coding->mode & CODING_MODE_LAST_BLOCK)
2609         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2610       else
2611         {
2612           int i;
2613
2614           charbuf -= cmp_status->length;
2615           for (i = 0; i < cmp_status->length; i++)
2616             cmp_status->carryover[i] = charbuf[i];
2617         }
2618     }
2619   if (last_id != charset_ascii)
2620     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2621   coding->consumed_char += consumed_chars_base;
2622   coding->consumed = src_base - coding->source;
2623   coding->charbuf_used = charbuf - coding->charbuf;
2624 }
2625
2626
2627 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2628   do {                                          \
2629     if (id < 0xA0)                              \
2630       codes[0] = id, codes[1] = 0;              \
2631     else if (id < 0xE0)                         \
2632       codes[0] = 0x9A, codes[1] = id;           \
2633     else if (id < 0xF0)                         \
2634       codes[0] = 0x9B, codes[1] = id;           \
2635     else if (id < 0xF5)                         \
2636       codes[0] = 0x9C, codes[1] = id;           \
2637     else                                        \
2638       codes[0] = 0x9D, codes[1] = id;           \
2639   } while (0);
2640
2641
2642 static int
2643 encode_coding_emacs_mule (struct coding_system *coding)
2644 {
2645   int multibytep = coding->dst_multibyte;
2646   int *charbuf = coding->charbuf;
2647   int *charbuf_end = charbuf + coding->charbuf_used;
2648   unsigned char *dst = coding->destination + coding->produced;
2649   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2650   int safe_room = 8;
2651   EMACS_INT produced_chars = 0;
2652   Lisp_Object attrs, charset_list;
2653   int c;
2654   int preferred_charset_id = -1;
2655
2656   CODING_GET_INFO (coding, attrs, charset_list);
2657   if (! EQ (charset_list, Vemacs_mule_charset_list))
2658     {
2659       CODING_ATTR_CHARSET_LIST (attrs)
2660         = charset_list = Vemacs_mule_charset_list;
2661     }
2662
2663   while (charbuf < charbuf_end)
2664     {
2665       ASSURE_DESTINATION (safe_room);
2666       c = *charbuf++;
2667
2668       if (c < 0)
2669         {
2670           /* Handle an annotation.  */
2671           switch (*charbuf)
2672             {
2673             case CODING_ANNOTATE_COMPOSITION_MASK:
2674               /* Not yet implemented.  */
2675               break;
2676             case CODING_ANNOTATE_CHARSET_MASK:
2677               preferred_charset_id = charbuf[3];
2678               if (preferred_charset_id >= 0
2679                   && NILP (Fmemq (make_number (preferred_charset_id),
2680                                   charset_list)))
2681                 preferred_charset_id = -1;
2682               break;
2683             default:
2684               abort ();
2685             }
2686           charbuf += -c - 1;
2687           continue;
2688         }
2689
2690       if (ASCII_CHAR_P (c))
2691         EMIT_ONE_ASCII_BYTE (c);
2692       else if (CHAR_BYTE8_P (c))
2693         {
2694           c = CHAR_TO_BYTE8 (c);
2695           EMIT_ONE_BYTE (c);
2696         }
2697       else
2698         {
2699           struct charset *charset;
2700           unsigned code;
2701           int dimension;
2702           int emacs_mule_id;
2703           unsigned char leading_codes[2];
2704
2705           if (preferred_charset_id >= 0)
2706             {
2707               int result;
2708
2709               charset = CHARSET_FROM_ID (preferred_charset_id);
2710               CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
2711               if (result)
2712                 code = ENCODE_CHAR (charset, c);
2713               else
2714                 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2715                                      &code, charset);
2716             }
2717           else
2718             CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2719                                  &code, charset);
2720           if (! charset)
2721             {
2722               c = coding->default_char;
2723               if (ASCII_CHAR_P (c))
2724                 {
2725                   EMIT_ONE_ASCII_BYTE (c);
2726                   continue;
2727                 }
2728               CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
2729                                    &code, charset);
2730             }
2731           dimension = CHARSET_DIMENSION (charset);
2732           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2733           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2734           EMIT_ONE_BYTE (leading_codes[0]);
2735           if (leading_codes[1])
2736             EMIT_ONE_BYTE (leading_codes[1]);
2737           if (dimension == 1)
2738             EMIT_ONE_BYTE (code | 0x80);
2739           else
2740             {
2741               code |= 0x8080;
2742               EMIT_ONE_BYTE (code >> 8);
2743               EMIT_ONE_BYTE (code & 0xFF);
2744             }
2745         }
2746     }
2747   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2748   coding->produced_char += produced_chars;
2749   coding->produced = dst - coding->destination;
2750   return 0;
2751 }
2752
2753 \f
2754 /*** 7. ISO2022 handlers ***/
2755
2756 /* The following note describes the coding system ISO2022 briefly.
2757    Since the intention of this note is to help understand the
2758    functions in this file, some parts are NOT ACCURATE or are OVERLY
2759    SIMPLIFIED.  For thorough understanding, please refer to the
2760    original document of ISO2022.  This is equivalent to the standard
2761    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2762
2763    ISO2022 provides many mechanisms to encode several character sets
2764    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2765    is encoded using bytes less than 128.  This may make the encoded
2766    text a little bit longer, but the text passes more easily through
2767    several types of gateway, some of which strip off the MSB (Most
2768    Significant Bit).
2769
2770    There are two kinds of character sets: control character sets and
2771    graphic character sets.  The former contain control characters such
2772    as `newline' and `escape' to provide control functions (control
2773    functions are also provided by escape sequences).  The latter
2774    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2775    two control character sets and many graphic character sets.
2776
2777    Graphic character sets are classified into one of the following
2778    four classes, according to the number of bytes (DIMENSION) and
2779    number of characters in one dimension (CHARS) of the set:
2780    - DIMENSION1_CHARS94
2781    - DIMENSION1_CHARS96
2782    - DIMENSION2_CHARS94
2783    - DIMENSION2_CHARS96
2784
2785    In addition, each character set is assigned an identification tag,
2786    unique for each set, called the "final character" (denoted as <F>
2787    hereafter).  The <F> of each character set is decided by ECMA(*)
2788    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2789    (0x30..0x3F are for private use only).
2790
2791    Note (*): ECMA = European Computer Manufacturers Association
2792
2793    Here are examples of graphic character sets [NAME(<F>)]:
2794         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2795         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2796         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2797         o DIMENSION2_CHARS96 -- none for the moment
2798
2799    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2800         C0 [0x00..0x1F] -- control character plane 0
2801         GL [0x20..0x7F] -- graphic character plane 0
2802         C1 [0x80..0x9F] -- control character plane 1
2803         GR [0xA0..0xFF] -- graphic character plane 1
2804
2805    A control character set is directly designated and invoked to C0 or
2806    C1 by an escape sequence.  The most common case is that:
2807    - ISO646's  control character set is designated/invoked to C0, and
2808    - ISO6429's control character set is designated/invoked to C1,
2809    and usually these designations/invocations are omitted in encoded
2810    text.  In a 7-bit environment, only C0 can be used, and a control
2811    character for C1 is encoded by an appropriate escape sequence to
2812    fit into the environment.  All control characters for C1 are
2813    defined to have corresponding escape sequences.
2814
2815    A graphic character set is at first designated to one of four
2816    graphic registers (G0 through G3), then these graphic registers are
2817    invoked to GL or GR.  These designations and invocations can be
2818    done independently.  The most common case is that G0 is invoked to
2819    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2820    these invocations and designations are omitted in encoded text.
2821    In a 7-bit environment, only GL can be used.
2822
2823    When a graphic character set of CHARS94 is invoked to GL, codes
2824    0x20 and 0x7F of the GL area work as control characters SPACE and
2825    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2826    be used.
2827
2828    There are two ways of invocation: locking-shift and single-shift.
2829    With locking-shift, the invocation lasts until the next different
2830    invocation, whereas with single-shift, the invocation affects the
2831    following character only and doesn't affect the locking-shift
2832    state.  Invocations are done by the following control characters or
2833    escape sequences:
2834
2835    ----------------------------------------------------------------------
2836    abbrev  function                  cntrl escape seq   description
2837    ----------------------------------------------------------------------
2838    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2839    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2840    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2841    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2842    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2843    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2844    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2845    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2846    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2847    ----------------------------------------------------------------------
2848    (*) These are not used by any known coding system.
2849
2850    Control characters for these functions are defined by macros
2851    ISO_CODE_XXX in `coding.h'.
2852
2853    Designations are done by the following escape sequences:
2854    ----------------------------------------------------------------------
2855    escape sequence      description
2856    ----------------------------------------------------------------------
2857    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2858    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2859    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2860    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2861    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2862    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2863    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2864    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2865    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2866    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2867    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2868    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2869    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2870    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2871    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2872    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2873    ----------------------------------------------------------------------
2874
2875    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2876    of dimension 1, chars 94, and final character <F>, etc...
2877
2878    Note (*): Although these designations are not allowed in ISO2022,
2879    Emacs accepts them on decoding, and produces them on encoding
2880    CHARS96 character sets in a coding system which is characterized as
2881    7-bit environment, non-locking-shift, and non-single-shift.
2882
2883    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2884    '(' must be omitted.  We refer to this as "short-form" hereafter.
2885
2886    Now you may notice that there are a lot of ways of encoding the
2887    same multilingual text in ISO2022.  Actually, there exist many
2888    coding systems such as Compound Text (used in X11's inter client
2889    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2890    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2891    localized platforms), and all of these are variants of ISO2022.
2892
2893    In addition to the above, Emacs handles two more kinds of escape
2894    sequences: ISO6429's direction specification and Emacs' private
2895    sequence for specifying character composition.
2896
2897    ISO6429's direction specification takes the following form:
2898         o CSI ']'      -- end of the current direction
2899         o CSI '0' ']'  -- end of the current direction
2900         o CSI '1' ']'  -- start of left-to-right text
2901         o CSI '2' ']'  -- start of right-to-left text
2902    The control character CSI (0x9B: control sequence introducer) is
2903    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2904
2905    Character composition specification takes the following form:
2906         o ESC '0' -- start relative composition
2907         o ESC '1' -- end composition
2908         o ESC '2' -- start rule-base composition (*)
2909         o ESC '3' -- start relative composition with alternate chars  (**)
2910         o ESC '4' -- start rule-base composition with alternate chars  (**)
2911   Since these are not standard escape sequences of any ISO standard,
2912   the use of them with these meanings is restricted to Emacs only.
2913
2914   (*) This form is used only in Emacs 20.7 and older versions,
2915   but newer versions can safely decode it.
2916   (**) This form is used only in Emacs 21.1 and newer versions,
2917   and older versions can't decode it.
2918
2919   Here's a list of example usages of these composition escape
2920   sequences (categorized by `enum composition_method').
2921
2922   COMPOSITION_RELATIVE:
2923         ESC 0 CHAR [ CHAR ] ESC 1
2924   COMPOSITION_WITH_RULE:
2925         ESC 2 CHAR [ RULE CHAR ] ESC 1
2926   COMPOSITION_WITH_ALTCHARS:
2927         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2928   COMPOSITION_WITH_RULE_ALTCHARS:
2929         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2930
2931 static enum iso_code_class_type iso_code_class[256];
2932
2933 #define SAFE_CHARSET_P(coding, id)      \
2934   ((id) <= (coding)->max_charset_id     \
2935    && (coding)->safe_charsets[id] != 255)
2936
2937 static void
2938 setup_iso_safe_charsets (Lisp_Object attrs)
2939 {
2940   Lisp_Object charset_list, safe_charsets;
2941   Lisp_Object request;
2942   Lisp_Object reg_usage;
2943   Lisp_Object tail;
2944   int reg94, reg96;
2945   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2946   int max_charset_id;
2947
2948   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2949   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2950       && ! EQ (charset_list, Viso_2022_charset_list))
2951     {
2952       CODING_ATTR_CHARSET_LIST (attrs)
2953         = charset_list = Viso_2022_charset_list;
2954       ASET (attrs, coding_attr_safe_charsets, Qnil);
2955     }
2956
2957   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2958     return;
2959
2960   max_charset_id = 0;
2961   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2962     {
2963       int id = XINT (XCAR (tail));
2964       if (max_charset_id < id)
2965         max_charset_id = id;
2966     }
2967
2968   safe_charsets = make_uninit_string (max_charset_id + 1);
2969   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
2970   request = AREF (attrs, coding_attr_iso_request);
2971   reg_usage = AREF (attrs, coding_attr_iso_usage);
2972   reg94 = XINT (XCAR (reg_usage));
2973   reg96 = XINT (XCDR (reg_usage));
2974
2975   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2976     {
2977       Lisp_Object id;
2978       Lisp_Object reg;
2979       struct charset *charset;
2980
2981       id = XCAR (tail);
2982       charset = CHARSET_FROM_ID (XINT (id));
2983       reg = Fcdr (Fassq (id, request));
2984       if (! NILP (reg))
2985         SSET (safe_charsets, XINT (id), XINT (reg));
2986       else if (charset->iso_chars_96)
2987         {
2988           if (reg96 < 4)
2989             SSET (safe_charsets, XINT (id), reg96);
2990         }
2991       else
2992         {
2993           if (reg94 < 4)
2994             SSET (safe_charsets, XINT (id), reg94);
2995         }
2996     }
2997   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2998 }
2999
3000
3001 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3002    Check if a text is encoded in one of ISO-2022 based coding systems.
3003    If it is, return 1, else return 0.  */
3004
3005 static int
3006 detect_coding_iso_2022 (struct coding_system *coding,
3007                         struct coding_detection_info *detect_info)
3008 {
3009   const unsigned char *src = coding->source, *src_base = src;
3010   const unsigned char *src_end = coding->source + coding->src_bytes;
3011   int multibytep = coding->src_multibyte;
3012   int single_shifting = 0;
3013   int id;
3014   int c, c1;
3015   EMACS_INT consumed_chars = 0;
3016   int i;
3017   int rejected = 0;
3018   int found = 0;
3019   int composition_count = -1;
3020
3021   detect_info->checked |= CATEGORY_MASK_ISO;
3022
3023   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3024     {
3025       struct coding_system *this = &(coding_categories[i]);
3026       Lisp_Object attrs, val;
3027
3028       if (this->id < 0)
3029         continue;
3030       attrs = CODING_ID_ATTRS (this->id);
3031       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3032           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3033         setup_iso_safe_charsets (attrs);
3034       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3035       this->max_charset_id = SCHARS (val) - 1;
3036       this->safe_charsets = SDATA (val);
3037     }
3038
3039   /* A coding system of this category is always ASCII compatible.  */
3040   src += coding->head_ascii;
3041
3042   while (rejected != CATEGORY_MASK_ISO)
3043     {
3044       src_base = src;
3045       ONE_MORE_BYTE (c);
3046       switch (c)
3047         {
3048         case ISO_CODE_ESC:
3049           if (inhibit_iso_escape_detection)
3050             break;
3051           single_shifting = 0;
3052           ONE_MORE_BYTE (c);
3053           if (c == 'N' || c == 'O')
3054             {
3055               /* ESC <Fe> for SS2 or SS3.  */
3056               single_shifting = 1;
3057               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3058             }
3059           else if (c == '1')
3060             {
3061               /* End of composition.  */
3062               if (composition_count < 0
3063                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3064                 /* Invalid */
3065                 break;
3066               composition_count = -1;
3067               found |= CATEGORY_MASK_ISO;
3068             }
3069           else if (c >= '0' && c <= '4')
3070             {
3071               /* ESC <Fp> for start/end composition.  */
3072               composition_count = 0;
3073             }
3074           else
3075             {
3076               if (c >= '(' && c <= '/')
3077                 {
3078                   /* Designation sequence for a charset of dimension 1.  */
3079                   ONE_MORE_BYTE (c1);
3080                   if (c1 < ' ' || c1 >= 0x80
3081                       || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3082                     /* Invalid designation sequence.  Just ignore.  */
3083                     break;
3084                 }
3085               else if (c == '$')
3086                 {
3087                   /* Designation sequence for a charset of dimension 2.  */
3088                   ONE_MORE_BYTE (c);
3089                   if (c >= '@' && c <= 'B')
3090                     /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3091                     id = iso_charset_table[1][0][c];
3092                   else if (c >= '(' && c <= '/')
3093                     {
3094                       ONE_MORE_BYTE (c1);
3095                       if (c1 < ' ' || c1 >= 0x80
3096                           || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3097                         /* Invalid designation sequence.  Just ignore.  */
3098                         break;
3099                     }
3100                   else
3101                     /* Invalid designation sequence.  Just ignore it.  */
3102                     break;
3103                 }
3104               else
3105                 {
3106                   /* Invalid escape sequence.  Just ignore it.  */
3107                   break;
3108                 }
3109
3110               /* We found a valid designation sequence for CHARSET.  */
3111               rejected |= CATEGORY_MASK_ISO_8BIT;
3112               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3113                                   id))
3114                 found |= CATEGORY_MASK_ISO_7;
3115               else
3116                 rejected |= CATEGORY_MASK_ISO_7;
3117               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3118                                   id))
3119                 found |= CATEGORY_MASK_ISO_7_TIGHT;
3120               else
3121                 rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3122               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3123                                   id))
3124                 found |= CATEGORY_MASK_ISO_7_ELSE;
3125               else
3126                 rejected |= CATEGORY_MASK_ISO_7_ELSE;
3127               if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3128                                   id))
3129                 found |= CATEGORY_MASK_ISO_8_ELSE;
3130               else
3131                 rejected |= CATEGORY_MASK_ISO_8_ELSE;
3132             }
3133           break;
3134
3135         case ISO_CODE_SO:
3136         case ISO_CODE_SI:
3137           /* Locking shift out/in.  */
3138           if (inhibit_iso_escape_detection)
3139             break;
3140           single_shifting = 0;
3141           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3142           break;
3143
3144         case ISO_CODE_CSI:
3145           /* Control sequence introducer.  */
3146           single_shifting = 0;
3147           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3148           found |= CATEGORY_MASK_ISO_8_ELSE;
3149           goto check_extra_latin;
3150
3151         case ISO_CODE_SS2:
3152         case ISO_CODE_SS3:
3153           /* Single shift.   */
3154           if (inhibit_iso_escape_detection)
3155             break;
3156           single_shifting = 0;
3157           rejected |= CATEGORY_MASK_ISO_7BIT;
3158           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3159               & CODING_ISO_FLAG_SINGLE_SHIFT)
3160             {
3161               found |= CATEGORY_MASK_ISO_8_1;
3162               single_shifting = 1;
3163             }
3164           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3165               & CODING_ISO_FLAG_SINGLE_SHIFT)
3166             {
3167               found |= CATEGORY_MASK_ISO_8_2;
3168               single_shifting = 1;
3169             }
3170           if (single_shifting)
3171             break;
3172         check_extra_latin:
3173           if (! VECTORP (Vlatin_extra_code_table)
3174               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3175             {
3176               rejected = CATEGORY_MASK_ISO;
3177               break;
3178             }
3179           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3180               & CODING_ISO_FLAG_LATIN_EXTRA)
3181             found |= CATEGORY_MASK_ISO_8_1;
3182           else
3183             rejected |= CATEGORY_MASK_ISO_8_1;
3184           rejected |= CATEGORY_MASK_ISO_8_2;
3185           break;
3186
3187         default:
3188           if (c < 0)
3189             continue;
3190           if (c < 0x80)
3191             {
3192               if (composition_count >= 0)
3193                 composition_count++;
3194               single_shifting = 0;
3195               break;
3196             }
3197           if (c >= 0xA0)
3198             {
3199               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3200               found |= CATEGORY_MASK_ISO_8_1;
3201               /* Check the length of succeeding codes of the range
3202                  0xA0..0FF.  If the byte length is even, we include
3203                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3204                  only when we are not single shifting.  */
3205               if (! single_shifting
3206                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3207                 {
3208                   int len = 1;
3209                   while (src < src_end)
3210                     {
3211                       src_base = src;
3212                       ONE_MORE_BYTE (c);
3213                       if (c < 0xA0)
3214                         {
3215                           src = src_base;
3216                           break;
3217                         }
3218                       len++;
3219                     }
3220
3221                   if (len & 1 && src < src_end)
3222                     {
3223                       rejected |= CATEGORY_MASK_ISO_8_2;
3224                       if (composition_count >= 0)
3225                         composition_count += len;
3226                     }
3227                   else
3228                     {
3229                       found |= CATEGORY_MASK_ISO_8_2;
3230                       if (composition_count >= 0)
3231                         composition_count += len / 2;
3232                     }
3233                 }
3234               break;
3235             }
3236         }
3237     }
3238   detect_info->rejected |= CATEGORY_MASK_ISO;
3239   return 0;
3240
3241  no_more_source:
3242   detect_info->rejected |= rejected;
3243   detect_info->found |= (found & ~rejected);
3244   return 1;
3245 }
3246
3247
3248 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3249    escape sequence should be kept.  */
3250 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3251   do {                                                                  \
3252     int id, prev;                                                       \
3253                                                                         \
3254     if (final < '0' || final >= 128                                     \
3255         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3256         || !SAFE_CHARSET_P (coding, id))                                \
3257       {                                                                 \
3258         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3259         chars_96 = -1;                                                  \
3260         break;                                                          \
3261       }                                                                 \
3262     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3263     if (id == charset_jisx0201_roman)                                   \
3264       {                                                                 \
3265         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3266           id = charset_ascii;                                           \
3267       }                                                                 \
3268     else if (id == charset_jisx0208_1978)                               \
3269       {                                                                 \
3270         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3271           id = charset_jisx0208;                                        \
3272       }                                                                 \
3273     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3274     /* If there was an invalid designation to REG previously, and this  \
3275        designation is ASCII to REG, we should keep this designation     \
3276        sequence.  */                                                    \
3277     if (prev == -2 && id == charset_ascii)                              \
3278       chars_96 = -1;                                                    \
3279   } while (0)
3280
3281
3282 /* Handle these composition sequence (ALT: alternate char):
3283
3284    (1) relative composition: ESC 0 CHAR ... ESC 1
3285    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3286    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3287    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3288
3289    When the start sequence (ESC 0/2/3/4) is found, this annotation
3290    header is produced.
3291
3292         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3293
3294    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3295    produced until the end sequence (ESC 1) is found:
3296
3297    (1) CHAR ... CHAR
3298    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3299    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3300    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3301
3302    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3303    annotation header is updated as below:
3304
3305    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3306    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3307    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3308    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3309
3310    If an error is found while composing, the annotation header is
3311    changed to:
3312
3313         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3314
3315    and the sequence [ -2 DECODED-RULE ] is changed to the original
3316    byte sequence as below:
3317         o the original byte sequence is B: [ B -1 ]
3318         o the original byte sequence is B1 B2: [ B1 B2 ]
3319    and the sequence [ -1 -1 ] is changed to the original byte
3320    sequence:
3321         [ ESC '0' ]
3322 */
3323
3324 /* Decode a composition rule C1 and maybe one more byte from the
3325    source, and set RULE to the encoded composition rule.  If the rule
3326    is invalid, goto invalid_code.  */
3327
3328 #define DECODE_COMPOSITION_RULE(rule)                                   \
3329   do {                                                                  \
3330     rule = c1 - 32;                                                     \
3331     if (rule < 0)                                                       \
3332       goto invalid_code;                                                \
3333     if (rule < 81)              /* old format (before ver.21) */        \
3334       {                                                                 \
3335         int gref = (rule) / 9;                                          \
3336         int nref = (rule) % 9;                                          \
3337         if (gref == 4) gref = 10;                                       \
3338         if (nref == 4) nref = 10;                                       \
3339         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3340       }                                                                 \
3341     else                        /* new format (after ver.21) */         \
3342       {                                                                 \
3343         int b;                                                          \
3344                                                                         \
3345         ONE_MORE_BYTE (b);                                              \
3346         if (! COMPOSITION_ENCODE_RULE_VALID (rule - 81, b - 32))        \
3347           goto invalid_code;                                            \
3348         rule = COMPOSITION_ENCODE_RULE (rule - 81, b - 32);             \
3349         rule += 0x100;   /* Distinguish it from the old format.  */     \
3350       }                                                                 \
3351   } while (0)
3352
3353 #define ENCODE_COMPOSITION_RULE(rule)                           \
3354   do {                                                          \
3355     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3356                                                                 \
3357     if (rule < 0x100)           /* old format */                \
3358       {                                                         \
3359         if (gref == 10) gref = 4;                               \
3360         if (nref == 10) nref = 4;                               \
3361         charbuf[idx] = 32 + gref * 9 + nref;                    \
3362         charbuf[idx + 1] = -1;                                  \
3363         new_chars++;                                            \
3364       }                                                         \
3365     else                                /* new format */        \
3366       {                                                         \
3367         charbuf[idx] = 32 + 81 + gref;                          \
3368         charbuf[idx + 1] = 32 + nref;                           \
3369         new_chars += 2;                                         \
3370       }                                                         \
3371   } while (0)
3372
3373 /* Finish the current composition as invalid.  */
3374
3375 static int finish_composition (int *, struct composition_status *);
3376
3377 static int
3378 finish_composition (int *charbuf, struct composition_status *cmp_status)
3379 {
3380   int idx = - cmp_status->length;
3381   int new_chars;
3382
3383   /* Recover the original ESC sequence */
3384   charbuf[idx++] = ISO_CODE_ESC;
3385   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3386                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3387                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3388                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3389                     : '4');
3390   charbuf[idx++] = -2;
3391   charbuf[idx++] = 0;
3392   charbuf[idx++] = -1;
3393   new_chars = cmp_status->nchars;
3394   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3395     for (; idx < 0; idx++)
3396       {
3397         int elt = charbuf[idx];
3398
3399         if (elt == -2)
3400           {
3401             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3402             idx++;
3403           }
3404         else if (elt == -1)
3405           {
3406             charbuf[idx++] = ISO_CODE_ESC;
3407             charbuf[idx] = '0';
3408             new_chars += 2;
3409           }
3410       }
3411   cmp_status->state = COMPOSING_NO;
3412   return new_chars;
3413 }
3414
3415 /* If characters are under composition, finish the composition.  */
3416 #define MAYBE_FINISH_COMPOSITION()                              \
3417   do {                                                          \
3418     if (cmp_status->state != COMPOSING_NO)                      \
3419       char_offset += finish_composition (charbuf, cmp_status);  \
3420   } while (0)
3421
3422 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3423
3424    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3425    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3426    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3427    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3428
3429    Produce this annotation sequence now:
3430
3431    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3432 */
3433
3434 #define DECODE_COMPOSITION_START(c1)                                       \
3435   do {                                                                     \
3436     if (c1 == '0'                                                          \
3437         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3438              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3439             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3440                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3441       {                                                                    \
3442         *charbuf++ = -1;                                                   \
3443         *charbuf++= -1;                                                    \
3444         cmp_status->state = COMPOSING_CHAR;                                \
3445         cmp_status->length += 2;                                           \
3446       }                                                                    \
3447     else                                                                   \
3448       {                                                                    \
3449         MAYBE_FINISH_COMPOSITION ();                                       \
3450         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3451                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3452                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3453                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3454         cmp_status->state                                                  \
3455           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3456         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3457         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3458         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3459         coding->annotated = 1;                                             \
3460       }                                                                    \
3461   } while (0)
3462
3463
3464 /* Handle composition end sequence ESC 1.  */
3465
3466 #define DECODE_COMPOSITION_END()                                        \
3467   do {                                                                  \
3468     if (cmp_status->nchars == 0                                         \
3469         || ((cmp_status->state == COMPOSING_CHAR)                       \
3470             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3471       {                                                                 \
3472         MAYBE_FINISH_COMPOSITION ();                                    \
3473         goto invalid_code;                                              \
3474       }                                                                 \
3475     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3476       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3477     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3478       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3479     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3480     char_offset += cmp_status->nchars;                                  \
3481     cmp_status->state = COMPOSING_NO;                                   \
3482   } while (0)
3483
3484 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3485
3486 #define STORE_COMPOSITION_RULE(rule)    \
3487   do {                                  \
3488     *charbuf++ = -2;                    \
3489     *charbuf++ = rule;                  \
3490     cmp_status->length += 2;            \
3491     cmp_status->state--;                \
3492   } while (0)
3493
3494 /* Store a composed char or a component char C in charbuf, and update
3495    cmp_status.  */
3496
3497 #define STORE_COMPOSITION_CHAR(c)                                       \
3498   do {                                                                  \
3499     *charbuf++ = (c);                                                   \
3500     cmp_status->length++;                                               \
3501     if (cmp_status->state == COMPOSING_CHAR)                            \
3502       cmp_status->nchars++;                                             \
3503     else                                                                \
3504       cmp_status->ncomps++;                                             \
3505     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3506         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3507             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3508       cmp_status->state++;                                              \
3509   } while (0)
3510
3511
3512 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3513
3514 static void
3515 decode_coding_iso_2022 (struct coding_system *coding)
3516 {
3517   const unsigned char *src = coding->source + coding->consumed;
3518   const unsigned char *src_end = coding->source + coding->src_bytes;
3519   const unsigned char *src_base;
3520   int *charbuf = coding->charbuf + coding->charbuf_used;
3521   /* We may produce two annotations (charset and composition) in one
3522      loop and one more charset annotation at the end.  */
3523   int *charbuf_end
3524     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3525   EMACS_INT consumed_chars = 0, consumed_chars_base;
3526   int multibytep = coding->src_multibyte;
3527   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3528   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3529   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3530   int charset_id_2, charset_id_3;
3531   struct charset *charset;
3532   int c;
3533   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3534   Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
3535   EMACS_INT char_offset = coding->produced_char;
3536   EMACS_INT last_offset = char_offset;
3537   int last_id = charset_ascii;
3538   int eol_dos =
3539     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3540   int byte_after_cr = -1;
3541   int i;
3542
3543   setup_iso_safe_charsets (attrs);
3544   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3545
3546   if (cmp_status->state != COMPOSING_NO)
3547     {
3548       if (charbuf_end - charbuf < cmp_status->length)
3549         abort ();
3550       for (i = 0; i < cmp_status->length; i++)
3551         *charbuf++ = cmp_status->carryover[i];
3552       coding->annotated = 1;
3553     }
3554
3555   while (1)
3556     {
3557       int c1, c2, c3;
3558
3559       src_base = src;
3560       consumed_chars_base = consumed_chars;
3561
3562       if (charbuf >= charbuf_end)
3563         {
3564           if (byte_after_cr >= 0)
3565             src_base--;
3566           break;
3567         }
3568
3569       if (byte_after_cr >= 0)
3570         c1 = byte_after_cr, byte_after_cr = -1;
3571       else
3572         ONE_MORE_BYTE (c1);
3573       if (c1 < 0)
3574         goto invalid_code;
3575
3576       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3577         {
3578           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3579           char_offset++;
3580           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3581           continue;
3582         }
3583
3584       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3585         {
3586           if (c1 == ISO_CODE_ESC)
3587             {
3588               if (src + 1 >= src_end)
3589                 goto no_more_source;
3590               *charbuf++ = ISO_CODE_ESC;
3591               char_offset++;
3592               if (src[0] == '%' && src[1] == '@')
3593                 {
3594                   src += 2;
3595                   consumed_chars += 2;
3596                   char_offset += 2;
3597                   /* We are sure charbuf can contain two more chars. */
3598                   *charbuf++ = '%';
3599                   *charbuf++ = '@';
3600                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3601                 }
3602             }
3603           else
3604             {
3605               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3606               char_offset++;
3607             }
3608           continue;
3609         }
3610
3611       if ((cmp_status->state == COMPOSING_RULE
3612            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3613           && c1 != ISO_CODE_ESC)
3614         {
3615           int rule;
3616
3617           DECODE_COMPOSITION_RULE (rule);
3618           STORE_COMPOSITION_RULE (rule);
3619           continue;
3620         }
3621
3622       /* We produce at most one character.  */
3623       switch (iso_code_class [c1])
3624         {
3625         case ISO_0x20_or_0x7F:
3626           if (charset_id_0 < 0
3627               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3628             /* This is SPACE or DEL.  */
3629             charset = CHARSET_FROM_ID (charset_ascii);
3630           else
3631             charset = CHARSET_FROM_ID (charset_id_0);
3632           break;
3633
3634         case ISO_graphic_plane_0:
3635           if (charset_id_0 < 0)
3636             charset = CHARSET_FROM_ID (charset_ascii);
3637           else
3638             charset = CHARSET_FROM_ID (charset_id_0);
3639           break;
3640
3641         case ISO_0xA0_or_0xFF:
3642           if (charset_id_1 < 0
3643               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3644               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3645             goto invalid_code;
3646           /* This is a graphic character, we fall down ... */
3647
3648         case ISO_graphic_plane_1:
3649           if (charset_id_1 < 0)
3650             goto invalid_code;
3651           charset = CHARSET_FROM_ID (charset_id_1);
3652           break;
3653
3654         case ISO_control_0:
3655           if (eol_dos && c1 == '\r')
3656             ONE_MORE_BYTE (byte_after_cr);
3657           MAYBE_FINISH_COMPOSITION ();
3658           charset = CHARSET_FROM_ID (charset_ascii);
3659           break;
3660
3661         case ISO_control_1:
3662           goto invalid_code;
3663
3664         case ISO_shift_out:
3665           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3666               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3667             goto invalid_code;
3668           CODING_ISO_INVOCATION (coding, 0) = 1;
3669           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3670           continue;
3671
3672         case ISO_shift_in:
3673           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3674             goto invalid_code;
3675           CODING_ISO_INVOCATION (coding, 0) = 0;
3676           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3677           continue;
3678
3679         case ISO_single_shift_2_7:
3680           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3681             goto invalid_code;
3682         case ISO_single_shift_2:
3683           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3684             goto invalid_code;
3685           /* SS2 is handled as an escape sequence of ESC 'N' */
3686           c1 = 'N';
3687           goto label_escape_sequence;
3688
3689         case ISO_single_shift_3:
3690           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3691             goto invalid_code;
3692           /* SS2 is handled as an escape sequence of ESC 'O' */
3693           c1 = 'O';
3694           goto label_escape_sequence;
3695
3696         case ISO_control_sequence_introducer:
3697           /* CSI is handled as an escape sequence of ESC '[' ...  */
3698           c1 = '[';
3699           goto label_escape_sequence;
3700
3701         case ISO_escape:
3702           ONE_MORE_BYTE (c1);
3703         label_escape_sequence:
3704           /* Escape sequences handled here are invocation,
3705              designation, direction specification, and character
3706              composition specification.  */
3707           switch (c1)
3708             {
3709             case '&':           /* revision of following character set */
3710               ONE_MORE_BYTE (c1);
3711               if (!(c1 >= '@' && c1 <= '~'))
3712                 goto invalid_code;
3713               ONE_MORE_BYTE (c1);
3714               if (c1 != ISO_CODE_ESC)
3715                 goto invalid_code;
3716               ONE_MORE_BYTE (c1);
3717               goto label_escape_sequence;
3718
3719             case '$':           /* designation of 2-byte character set */
3720               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3721                 goto invalid_code;
3722               {
3723                 int reg, chars96;
3724
3725                 ONE_MORE_BYTE (c1);
3726                 if (c1 >= '@' && c1 <= 'B')
3727                   {     /* designation of JISX0208.1978, GB2312.1980,
3728                            or JISX0208.1980 */
3729                     reg = 0, chars96 = 0;
3730                   }
3731                 else if (c1 >= 0x28 && c1 <= 0x2B)
3732                   { /* designation of DIMENSION2_CHARS94 character set */
3733                     reg = c1 - 0x28, chars96 = 0;
3734                     ONE_MORE_BYTE (c1);
3735                   }
3736                 else if (c1 >= 0x2C && c1 <= 0x2F)
3737                   { /* designation of DIMENSION2_CHARS96 character set */
3738                     reg = c1 - 0x2C, chars96 = 1;
3739                     ONE_MORE_BYTE (c1);
3740                   }
3741                 else
3742                   goto invalid_code;
3743                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3744                 /* We must update these variables now.  */
3745                 if (reg == 0)
3746                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3747                 else if (reg == 1)
3748                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3749                 if (chars96 < 0)
3750                   goto invalid_code;
3751               }
3752               continue;
3753
3754             case 'n':           /* invocation of locking-shift-2 */
3755               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3756                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3757                 goto invalid_code;
3758               CODING_ISO_INVOCATION (coding, 0) = 2;
3759               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3760               continue;
3761
3762             case 'o':           /* invocation of locking-shift-3 */
3763               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3764                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3765                 goto invalid_code;
3766               CODING_ISO_INVOCATION (coding, 0) = 3;
3767               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3768               continue;
3769
3770             case 'N':           /* invocation of single-shift-2 */
3771               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3772                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3773                 goto invalid_code;
3774               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3775               if (charset_id_2 < 0)
3776                 charset = CHARSET_FROM_ID (charset_ascii);
3777               else
3778                 charset = CHARSET_FROM_ID (charset_id_2);
3779               ONE_MORE_BYTE (c1);
3780               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3781                 goto invalid_code;
3782               break;
3783
3784             case 'O':           /* invocation of single-shift-3 */
3785               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3786                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3787                 goto invalid_code;
3788               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3789               if (charset_id_3 < 0)
3790                 charset = CHARSET_FROM_ID (charset_ascii);
3791               else
3792                 charset = CHARSET_FROM_ID (charset_id_3);
3793               ONE_MORE_BYTE (c1);
3794               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3795                 goto invalid_code;
3796               break;
3797
3798             case '0': case '2': case '3': case '4': /* start composition */
3799               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3800                 goto invalid_code;
3801               if (last_id != charset_ascii)
3802                 {
3803                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3804                   last_id = charset_ascii;
3805                   last_offset = char_offset;
3806                 }
3807               DECODE_COMPOSITION_START (c1);
3808               continue;
3809
3810             case '1':           /* end composition */
3811               if (cmp_status->state == COMPOSING_NO)
3812                 goto invalid_code;
3813               DECODE_COMPOSITION_END ();
3814               continue;
3815
3816             case '[':           /* specification of direction */
3817               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3818                 goto invalid_code;
3819               /* For the moment, nested direction is not supported.
3820                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3821                  left-to-right, and nonzero means right-to-left.  */
3822               ONE_MORE_BYTE (c1);
3823               switch (c1)
3824                 {
3825                 case ']':       /* end of the current direction */
3826                   coding->mode &= ~CODING_MODE_DIRECTION;
3827
3828                 case '0':       /* end of the current direction */
3829                 case '1':       /* start of left-to-right direction */
3830                   ONE_MORE_BYTE (c1);
3831                   if (c1 == ']')
3832                     coding->mode &= ~CODING_MODE_DIRECTION;
3833                   else
3834                     goto invalid_code;
3835                   break;
3836
3837                 case '2':       /* start of right-to-left direction */
3838                   ONE_MORE_BYTE (c1);
3839                   if (c1 == ']')
3840                     coding->mode |= CODING_MODE_DIRECTION;
3841                   else
3842                     goto invalid_code;
3843                   break;
3844
3845                 default:
3846                   goto invalid_code;
3847                 }
3848               continue;
3849
3850             case '%':
3851               ONE_MORE_BYTE (c1);
3852               if (c1 == '/')
3853                 {
3854                   /* CTEXT extended segment:
3855                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3856                      We keep these bytes as is for the moment.
3857                      They may be decoded by post-read-conversion.  */
3858                   int dim, M, L;
3859                   int size;
3860
3861                   ONE_MORE_BYTE (dim);
3862                   if (dim < '0' || dim > '4')
3863                     goto invalid_code;
3864                   ONE_MORE_BYTE (M);
3865                   if (M < 128)
3866                     goto invalid_code;
3867                   ONE_MORE_BYTE (L);
3868                   if (L < 128)
3869                     goto invalid_code;
3870                   size = ((M - 128) * 128) + (L - 128);
3871                   if (charbuf + 6 > charbuf_end)
3872                     goto break_loop;
3873                   *charbuf++ = ISO_CODE_ESC;
3874                   *charbuf++ = '%';
3875                   *charbuf++ = '/';
3876                   *charbuf++ = dim;
3877                   *charbuf++ = BYTE8_TO_CHAR (M);
3878                   *charbuf++ = BYTE8_TO_CHAR (L);
3879                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3880                 }
3881               else if (c1 == 'G')
3882                 {
3883                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3884                      ESC % G --UTF-8-BYTES-- ESC % @
3885                      We keep these bytes as is for the moment.
3886                      They may be decoded by post-read-conversion.  */
3887                   if (charbuf + 3 > charbuf_end)
3888                     goto break_loop;
3889                   *charbuf++ = ISO_CODE_ESC;
3890                   *charbuf++ = '%';
3891                   *charbuf++ = 'G';
3892                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3893                 }
3894               else
3895                 goto invalid_code;
3896               continue;
3897               break;
3898
3899             default:
3900               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3901                 goto invalid_code;
3902               {
3903                 int reg, chars96;
3904
3905                 if (c1 >= 0x28 && c1 <= 0x2B)
3906                   { /* designation of DIMENSION1_CHARS94 character set */
3907                     reg = c1 - 0x28, chars96 = 0;
3908                     ONE_MORE_BYTE (c1);
3909                   }
3910                 else if (c1 >= 0x2C && c1 <= 0x2F)
3911                   { /* designation of DIMENSION1_CHARS96 character set */
3912                     reg = c1 - 0x2C, chars96 = 1;
3913                     ONE_MORE_BYTE (c1);
3914                   }
3915                 else
3916                   goto invalid_code;
3917                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3918                 /* We must update these variables now.  */
3919                 if (reg == 0)
3920                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3921                 else if (reg == 1)
3922                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3923                 if (chars96 < 0)
3924                   goto invalid_code;
3925               }
3926               continue;
3927             }
3928           break;
3929
3930         default:
3931           abort ();
3932         }
3933
3934       if (cmp_status->state == COMPOSING_NO
3935           && charset->id != charset_ascii
3936           && last_id != charset->id)
3937         {
3938           if (last_id != charset_ascii)
3939             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3940           last_id = charset->id;
3941           last_offset = char_offset;
3942         }
3943
3944       /* Now we know CHARSET and 1st position code C1 of a character.
3945          Produce a decoded character while getting 2nd and 3rd
3946          position codes C2, C3 if necessary.  */
3947       if (CHARSET_DIMENSION (charset) > 1)
3948         {
3949           ONE_MORE_BYTE (c2);
3950           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3951               || ((c1 & 0x80) != (c2 & 0x80)))
3952             /* C2 is not in a valid range.  */
3953             goto invalid_code;
3954           if (CHARSET_DIMENSION (charset) == 2)
3955             c1 = (c1 << 8) | c2;
3956           else
3957             {
3958               ONE_MORE_BYTE (c3);
3959               if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
3960                   || ((c1 & 0x80) != (c3 & 0x80)))
3961                 /* C3 is not in a valid range.  */
3962                 goto invalid_code;
3963               c1 = (c1 << 16) | (c2 << 8) | c2;
3964             }
3965         }
3966       c1 &= 0x7F7F7F;
3967       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3968       if (c < 0)
3969         {
3970           MAYBE_FINISH_COMPOSITION ();
3971           for (; src_base < src; src_base++, char_offset++)
3972             {
3973               if (ASCII_BYTE_P (*src_base))
3974                 *charbuf++ = *src_base;
3975               else
3976                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3977             }
3978         }
3979       else if (cmp_status->state == COMPOSING_NO)
3980         {
3981           *charbuf++ = c;
3982           char_offset++;
3983         }
3984       else if ((cmp_status->state == COMPOSING_CHAR
3985                 ? cmp_status->nchars
3986                 : cmp_status->ncomps)
3987                >= MAX_COMPOSITION_COMPONENTS)
3988         {
3989           /* Too long composition.  */
3990           MAYBE_FINISH_COMPOSITION ();
3991           *charbuf++ = c;
3992           char_offset++;
3993         }
3994       else
3995         STORE_COMPOSITION_CHAR (c);
3996       continue;
3997
3998     invalid_code:
3999       MAYBE_FINISH_COMPOSITION ();
4000       src = src_base;
4001       consumed_chars = consumed_chars_base;
4002       ONE_MORE_BYTE (c);
4003       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4004       char_offset++;
4005       coding->errors++;
4006       continue;
4007
4008     break_loop:
4009       break;
4010     }
4011
4012  no_more_source:
4013   if (cmp_status->state != COMPOSING_NO)
4014     {
4015       if (coding->mode & CODING_MODE_LAST_BLOCK)
4016         MAYBE_FINISH_COMPOSITION ();
4017       else
4018         {
4019           charbuf -= cmp_status->length;
4020           for (i = 0; i < cmp_status->length; i++)
4021             cmp_status->carryover[i] = charbuf[i];
4022         }
4023     }
4024   else if (last_id != charset_ascii)
4025     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4026   coding->consumed_char += consumed_chars_base;
4027   coding->consumed = src_base - coding->source;
4028   coding->charbuf_used = charbuf - coding->charbuf;
4029 }
4030
4031
4032 /* ISO2022 encoding stuff.  */
4033
4034 /*
4035    It is not enough to say just "ISO2022" on encoding, we have to
4036    specify more details.  In Emacs, each coding system of ISO2022
4037    variant has the following specifications:
4038         1. Initial designation to G0 thru G3.
4039         2. Allows short-form designation?
4040         3. ASCII should be designated to G0 before control characters?
4041         4. ASCII should be designated to G0 at end of line?
4042         5. 7-bit environment or 8-bit environment?
4043         6. Use locking-shift?
4044         7. Use Single-shift?
4045    And the following two are only for Japanese:
4046         8. Use ASCII in place of JIS0201-1976-Roman?
4047         9. Use JISX0208-1983 in place of JISX0208-1978?
4048    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4049    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4050    details.
4051 */
4052
4053 /* Produce codes (escape sequence) for designating CHARSET to graphic
4054    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4055    '@', 'A', or 'B' and the coding system CODING allows, produce
4056    designation sequence of short-form.  */
4057
4058 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4059   do {                                                                  \
4060     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4061     const char *intermediate_char_94 = "()*+";                          \
4062     const char *intermediate_char_96 = ",-./";                          \
4063     int revision = -1;                                                  \
4064                                                                         \
4065     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4066       revision = CHARSET_ISO_REVISION (charset);                        \
4067                                                                         \
4068     if (revision >= 0)                                                  \
4069       {                                                                 \
4070         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4071         EMIT_ONE_BYTE ('@' + revision);                                 \
4072       }                                                                 \
4073     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4074     if (CHARSET_DIMENSION (charset) == 1)                               \
4075       {                                                                 \
4076         int b;                                                          \
4077         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4078           b = intermediate_char_94[reg];                                \
4079         else                                                            \
4080           b = intermediate_char_96[reg];                                \
4081         EMIT_ONE_ASCII_BYTE (b);                                        \
4082       }                                                                 \
4083     else                                                                \
4084       {                                                                 \
4085         EMIT_ONE_ASCII_BYTE ('$');                                      \
4086         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4087           {                                                             \
4088             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4089                 || reg != 0                                             \
4090                 || final_char < '@' || final_char > 'B')                \
4091               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4092           }                                                             \
4093         else                                                            \
4094           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4095       }                                                                 \
4096     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4097                                                                         \
4098     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4099   } while (0)
4100
4101
4102 /* The following two macros produce codes (control character or escape
4103    sequence) for ISO2022 single-shift functions (single-shift-2 and
4104    single-shift-3).  */
4105
4106 #define ENCODE_SINGLE_SHIFT_2                                           \
4107   do {                                                                  \
4108     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4109       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4110     else                                                                \
4111       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4112     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4113   } while (0)
4114
4115
4116 #define ENCODE_SINGLE_SHIFT_3                                           \
4117   do {                                                                  \
4118     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4119       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4120     else                                                                \
4121       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4122     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4123   } while (0)
4124
4125
4126 /* The following four macros produce codes (control character or
4127    escape sequence) for ISO2022 locking-shift functions (shift-in,
4128    shift-out, locking-shift-2, and locking-shift-3).  */
4129
4130 #define ENCODE_SHIFT_IN                                 \
4131   do {                                                  \
4132     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4133     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4134   } while (0)
4135
4136
4137 #define ENCODE_SHIFT_OUT                                \
4138   do {                                                  \
4139     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4140     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4141   } while (0)
4142
4143
4144 #define ENCODE_LOCKING_SHIFT_2                          \
4145   do {                                                  \
4146     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4147     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4148   } while (0)
4149
4150
4151 #define ENCODE_LOCKING_SHIFT_3                          \
4152   do {                                                  \
4153     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4154     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4155   } while (0)
4156
4157
4158 /* Produce codes for a DIMENSION1 character whose character set is
4159    CHARSET and whose position-code is C1.  Designation and invocation
4160    sequences are also produced in advance if necessary.  */
4161
4162 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4163   do {                                                                  \
4164     int id = CHARSET_ID (charset);                                      \
4165                                                                         \
4166     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4167         && id == charset_ascii)                                         \
4168       {                                                                 \
4169         id = charset_jisx0201_roman;                                    \
4170         charset = CHARSET_FROM_ID (id);                                 \
4171       }                                                                 \
4172                                                                         \
4173     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4174       {                                                                 \
4175         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4176           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4177         else                                                            \
4178           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4179         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4180         break;                                                          \
4181       }                                                                 \
4182     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4183       {                                                                 \
4184         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4185         break;                                                          \
4186       }                                                                 \
4187     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4188       {                                                                 \
4189         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4190         break;                                                          \
4191       }                                                                 \
4192     else                                                                \
4193       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4194          must invoke it, or, at first, designate it to some graphic     \
4195          register.  Then repeat the loop to actually produce the        \
4196          character.  */                                                 \
4197       dst = encode_invocation_designation (charset, coding, dst,        \
4198                                            &produced_chars);            \
4199   } while (1)
4200
4201
4202 /* Produce codes for a DIMENSION2 character whose character set is
4203    CHARSET and whose position-codes are C1 and C2.  Designation and
4204    invocation codes are also produced in advance if necessary.  */
4205
4206 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4207   do {                                                                  \
4208     int id = CHARSET_ID (charset);                                      \
4209                                                                         \
4210     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4211         && id == charset_jisx0208)                                      \
4212       {                                                                 \
4213         id = charset_jisx0208_1978;                                     \
4214         charset = CHARSET_FROM_ID (id);                                 \
4215       }                                                                 \
4216                                                                         \
4217     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4218       {                                                                 \
4219         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4220           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4221         else                                                            \
4222           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4223         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4224         break;                                                          \
4225       }                                                                 \
4226     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4227       {                                                                 \
4228         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4229         break;                                                          \
4230       }                                                                 \
4231     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4232       {                                                                 \
4233         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4234         break;                                                          \
4235       }                                                                 \
4236     else                                                                \
4237       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4238          must invoke it, or, at first, designate it to some graphic     \
4239          register.  Then repeat the loop to actually produce the        \
4240          character.  */                                                 \
4241       dst = encode_invocation_designation (charset, coding, dst,        \
4242                                            &produced_chars);            \
4243   } while (1)
4244
4245
4246 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4247   do {                                                                     \
4248     int code;                                                              \
4249     CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code);       \
4250                                                                            \
4251     if (CHARSET_DIMENSION (charset) == 1)                                  \
4252       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4253     else                                                                   \
4254       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4255   } while (0)
4256
4257
4258 /* Produce designation and invocation codes at a place pointed by DST
4259    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4260    Return new DST.  */
4261
4262 static unsigned char *
4263 encode_invocation_designation (struct charset *charset,
4264                                struct coding_system *coding,
4265                                unsigned char *dst, EMACS_INT *p_nchars)
4266 {
4267   int multibytep = coding->dst_multibyte;
4268   EMACS_INT produced_chars = *p_nchars;
4269   int reg;                      /* graphic register number */
4270   int id = CHARSET_ID (charset);
4271
4272   /* At first, check designations.  */
4273   for (reg = 0; reg < 4; reg++)
4274     if (id == CODING_ISO_DESIGNATION (coding, reg))
4275       break;
4276
4277   if (reg >= 4)
4278     {
4279       /* CHARSET is not yet designated to any graphic registers.  */
4280       /* At first check the requested designation.  */
4281       reg = CODING_ISO_REQUEST (coding, id);
4282       if (reg < 0)
4283         /* Since CHARSET requests no special designation, designate it
4284            to graphic register 0.  */
4285         reg = 0;
4286
4287       ENCODE_DESIGNATION (charset, reg, coding);
4288     }
4289
4290   if (CODING_ISO_INVOCATION (coding, 0) != reg
4291       && CODING_ISO_INVOCATION (coding, 1) != reg)
4292     {
4293       /* Since the graphic register REG is not invoked to any graphic
4294          planes, invoke it to graphic plane 0.  */
4295       switch (reg)
4296         {
4297         case 0:                 /* graphic register 0 */
4298           ENCODE_SHIFT_IN;
4299           break;
4300
4301         case 1:                 /* graphic register 1 */
4302           ENCODE_SHIFT_OUT;
4303           break;
4304
4305         case 2:                 /* graphic register 2 */
4306           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4307             ENCODE_SINGLE_SHIFT_2;
4308           else
4309             ENCODE_LOCKING_SHIFT_2;
4310           break;
4311
4312         case 3:                 /* graphic register 3 */
4313           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4314             ENCODE_SINGLE_SHIFT_3;
4315           else
4316             ENCODE_LOCKING_SHIFT_3;
4317           break;
4318         }
4319     }
4320
4321   *p_nchars = produced_chars;
4322   return dst;
4323 }
4324
4325
4326 /* Produce codes for designation and invocation to reset the graphic
4327    planes and registers to initial state.  */
4328 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4329   do {                                                                  \
4330     int reg;                                                            \
4331     struct charset *charset;                                            \
4332                                                                         \
4333     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4334       ENCODE_SHIFT_IN;                                                  \
4335     for (reg = 0; reg < 4; reg++)                                       \
4336       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4337           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4338               != CODING_ISO_INITIAL (coding, reg)))                     \
4339         {                                                               \
4340           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4341           ENCODE_DESIGNATION (charset, reg, coding);                    \
4342         }                                                               \
4343   } while (0)
4344
4345
4346 /* Produce designation sequences of charsets in the line started from
4347    CHARBUF to a place pointed by DST, and return the number of
4348    produced bytes.  DST should not directly point a buffer text area
4349    which may be relocated by char_charset call.
4350
4351    If the current block ends before any end-of-line, we may fail to
4352    find all the necessary designations.  */
4353
4354 static EMACS_INT
4355 encode_designation_at_bol (struct coding_system *coding,
4356                            int *charbuf, int *charbuf_end,
4357                            unsigned char *dst)
4358 {
4359   unsigned char *orig = dst;
4360   struct charset *charset;
4361   /* Table of charsets to be designated to each graphic register.  */
4362   int r[4];
4363   int c, found = 0, reg;
4364   EMACS_INT produced_chars = 0;
4365   int multibytep = coding->dst_multibyte;
4366   Lisp_Object attrs;
4367   Lisp_Object charset_list;
4368
4369   attrs = CODING_ID_ATTRS (coding->id);
4370   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4371   if (EQ (charset_list, Qiso_2022))
4372     charset_list = Viso_2022_charset_list;
4373
4374   for (reg = 0; reg < 4; reg++)
4375     r[reg] = -1;
4376
4377   while (charbuf < charbuf_end && found < 4)
4378     {
4379       int id;
4380
4381       c = *charbuf++;
4382       if (c == '\n')
4383         break;
4384       charset = char_charset (c, charset_list, NULL);
4385       id = CHARSET_ID (charset);
4386       reg = CODING_ISO_REQUEST (coding, id);
4387       if (reg >= 0 && r[reg] < 0)
4388         {
4389           found++;
4390           r[reg] = id;
4391         }
4392     }
4393
4394   if (found)
4395     {
4396       for (reg = 0; reg < 4; reg++)
4397         if (r[reg] >= 0
4398             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4399           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4400     }
4401
4402   return dst - orig;
4403 }
4404
4405 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4406
4407 static int
4408 encode_coding_iso_2022 (struct coding_system *coding)
4409 {
4410   int multibytep = coding->dst_multibyte;
4411   int *charbuf = coding->charbuf;
4412   int *charbuf_end = charbuf + coding->charbuf_used;
4413   unsigned char *dst = coding->destination + coding->produced;
4414   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4415   int safe_room = 16;
4416   int bol_designation
4417     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4418        && CODING_ISO_BOL (coding));
4419   EMACS_INT produced_chars = 0;
4420   Lisp_Object attrs, eol_type, charset_list;
4421   int ascii_compatible;
4422   int c;
4423   int preferred_charset_id = -1;
4424
4425   CODING_GET_INFO (coding, attrs, charset_list);
4426   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4427   if (VECTORP (eol_type))
4428     eol_type = Qunix;
4429
4430   setup_iso_safe_charsets (attrs);
4431   /* Charset list may have been changed.  */
4432   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4433   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4434
4435   ascii_compatible
4436     = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
4437        && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION
4438                                           | CODING_ISO_FLAG_LOCKING_SHIFT)));
4439
4440   while (charbuf < charbuf_end)
4441     {
4442       ASSURE_DESTINATION (safe_room);
4443
4444       if (bol_designation)
4445         {
4446           /* We have to produce designation sequences if any now.  */
4447           unsigned char desig_buf[16];
4448           int nbytes;
4449           EMACS_INT offset;
4450
4451           charset_map_loaded = 0;
4452           nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end,
4453                                               desig_buf);
4454           if (charset_map_loaded
4455               && (offset = coding_set_destination (coding)))
4456             {
4457               dst += offset;
4458               dst_end += offset;
4459             }
4460           memcpy (dst, desig_buf, nbytes);
4461           dst += nbytes;
4462           /* We are sure that designation sequences are all ASCII bytes.  */
4463           produced_chars += nbytes;
4464           bol_designation = 0;
4465           ASSURE_DESTINATION (safe_room);
4466         }
4467
4468       c = *charbuf++;
4469
4470       if (c < 0)
4471         {
4472           /* Handle an annotation.  */
4473           switch (*charbuf)
4474             {
4475             case CODING_ANNOTATE_COMPOSITION_MASK:
4476               /* Not yet implemented.  */
4477               break;
4478             case CODING_ANNOTATE_CHARSET_MASK:
4479               preferred_charset_id = charbuf[2];
4480               if (preferred_charset_id >= 0
4481                   && NILP (Fmemq (make_number (preferred_charset_id),
4482                                   charset_list)))
4483                 preferred_charset_id = -1;
4484               break;
4485             default:
4486               abort ();
4487             }
4488           charbuf += -c - 1;
4489           continue;
4490         }
4491
4492       /* Now encode the character C.  */
4493       if (c < 0x20 || c == 0x7F)
4494         {
4495           if (c == '\n'
4496               || (c == '\r' && EQ (eol_type, Qmac)))
4497             {
4498               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4499                 ENCODE_RESET_PLANE_AND_REGISTER ();
4500               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4501                 {
4502                   int i;
4503
4504                   for (i = 0; i < 4; i++)
4505                     CODING_ISO_DESIGNATION (coding, i)
4506                       = CODING_ISO_INITIAL (coding, i);
4507                 }
4508               bol_designation
4509                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4510             }
4511           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4512             ENCODE_RESET_PLANE_AND_REGISTER ();
4513           EMIT_ONE_ASCII_BYTE (c);
4514         }
4515       else if (ASCII_CHAR_P (c))
4516         {
4517           if (ascii_compatible)
4518             EMIT_ONE_ASCII_BYTE (c);
4519           else
4520             {
4521               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4522               ENCODE_ISO_CHARACTER (charset, c);
4523             }
4524         }
4525       else if (CHAR_BYTE8_P (c))
4526         {
4527           c = CHAR_TO_BYTE8 (c);
4528           EMIT_ONE_BYTE (c);
4529         }
4530       else
4531         {
4532           struct charset *charset;
4533
4534           if (preferred_charset_id >= 0)
4535             {
4536               int result;
4537
4538               charset = CHARSET_FROM_ID (preferred_charset_id);
4539               CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result);
4540               if (! result)
4541                 CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4542                                      NULL, charset);
4543             }
4544           else
4545             CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4546                                  NULL, charset);
4547           if (!charset)
4548             {
4549               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4550                 {
4551                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4552                   charset = CHARSET_FROM_ID (charset_ascii);
4553                 }
4554               else
4555                 {
4556                   c = coding->default_char;
4557                   CODING_CHAR_CHARSET (coding, dst, dst_end, c,
4558                                        charset_list, NULL, charset);
4559                 }
4560             }
4561           ENCODE_ISO_CHARACTER (charset, c);
4562         }
4563     }
4564
4565   if (coding->mode & CODING_MODE_LAST_BLOCK
4566       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4567     {
4568       ASSURE_DESTINATION (safe_room);
4569       ENCODE_RESET_PLANE_AND_REGISTER ();
4570     }
4571   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4572   CODING_ISO_BOL (coding) = bol_designation;
4573   coding->produced_char += produced_chars;
4574   coding->produced = dst - coding->destination;
4575   return 0;
4576 }
4577
4578 \f
4579 /*** 8,9. SJIS and BIG5 handlers ***/
4580
4581 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4582    quite widely.  So, for the moment, Emacs supports them in the bare
4583    C code.  But, in the future, they may be supported only by CCL.  */
4584
4585 /* SJIS is a coding system encoding three character sets: ASCII, right
4586    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4587    as is.  A character of charset katakana-jisx0201 is encoded by
4588    "position-code + 0x80".  A character of charset japanese-jisx0208
4589    is encoded in 2-byte but two position-codes are divided and shifted
4590    so that it fit in the range below.
4591
4592    --- CODE RANGE of SJIS ---
4593    (character set)      (range)
4594    ASCII                0x00 .. 0x7F
4595    KATAKANA-JISX0201    0xA0 .. 0xDF
4596    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4597             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4598    -------------------------------
4599
4600 */
4601
4602 /* BIG5 is a coding system encoding two character sets: ASCII and
4603    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4604    character set and is encoded in two-byte.
4605
4606    --- CODE RANGE of BIG5 ---
4607    (character set)      (range)
4608    ASCII                0x00 .. 0x7F
4609    Big5 (1st byte)      0xA1 .. 0xFE
4610         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4611    --------------------------
4612
4613   */
4614
4615 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4616    Check if a text is encoded in SJIS.  If it is, return
4617    CATEGORY_MASK_SJIS, else return 0.  */
4618
4619 static int
4620 detect_coding_sjis (struct coding_system *coding,
4621                     struct coding_detection_info *detect_info)
4622 {
4623   const unsigned char *src = coding->source, *src_base;
4624   const unsigned char *src_end = coding->source + coding->src_bytes;
4625   int multibytep = coding->src_multibyte;
4626   EMACS_INT consumed_chars = 0;
4627   int found = 0;
4628   int c;
4629   Lisp_Object attrs, charset_list;
4630   int max_first_byte_of_2_byte_code;
4631
4632   CODING_GET_INFO (coding, attrs, charset_list);
4633   max_first_byte_of_2_byte_code
4634     = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4635
4636   detect_info->checked |= CATEGORY_MASK_SJIS;
4637   /* A coding system of this category is always ASCII compatible.  */
4638   src += coding->head_ascii;
4639
4640   while (1)
4641     {
4642       src_base = src;
4643       ONE_MORE_BYTE (c);
4644       if (c < 0x80)
4645         continue;
4646       if ((c >= 0x81 && c <= 0x9F)
4647           || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4648         {
4649           ONE_MORE_BYTE (c);
4650           if (c < 0x40 || c == 0x7F || c > 0xFC)
4651             break;
4652           found = CATEGORY_MASK_SJIS;
4653         }
4654       else if (c >= 0xA0 && c < 0xE0)
4655         found = CATEGORY_MASK_SJIS;
4656       else
4657         break;
4658     }
4659   detect_info->rejected |= CATEGORY_MASK_SJIS;
4660   return 0;
4661
4662  no_more_source:
4663   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4664     {
4665       detect_info->rejected |= CATEGORY_MASK_SJIS;
4666       return 0;
4667     }
4668   detect_info->found |= found;
4669   return 1;
4670 }
4671
4672 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4673    Check if a text is encoded in BIG5.  If it is, return
4674    CATEGORY_MASK_BIG5, else return 0.  */
4675
4676 static int
4677 detect_coding_big5 (struct coding_system *coding,
4678                     struct coding_detection_info *detect_info)
4679 {
4680   const unsigned char *src = coding->source, *src_base;
4681   const unsigned char *src_end = coding->source + coding->src_bytes;
4682   int multibytep = coding->src_multibyte;
4683   EMACS_INT consumed_chars = 0;
4684   int found = 0;
4685   int c;
4686
4687   detect_info->checked |= CATEGORY_MASK_BIG5;
4688   /* A coding system of this category is always ASCII compatible.  */
4689   src += coding->head_ascii;
4690
4691   while (1)
4692     {
4693       src_base = src;
4694       ONE_MORE_BYTE (c);
4695       if (c < 0x80)
4696         continue;
4697       if (c >= 0xA1)
4698         {
4699           ONE_MORE_BYTE (c);
4700           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4701             return 0;
4702           found = CATEGORY_MASK_BIG5;
4703         }
4704       else
4705         break;
4706     }
4707   detect_info->rejected |= CATEGORY_MASK_BIG5;
4708   return 0;
4709
4710  no_more_source:
4711   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4712     {
4713       detect_info->rejected |= CATEGORY_MASK_BIG5;
4714       return 0;
4715     }
4716   detect_info->found |= found;
4717   return 1;
4718 }
4719
4720 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4721    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4722
4723 static void
4724 decode_coding_sjis (struct coding_system *coding)
4725 {
4726   const unsigned char *src = coding->source + coding->consumed;
4727   const unsigned char *src_end = coding->source + coding->src_bytes;
4728   const unsigned char *src_base;
4729   int *charbuf = coding->charbuf + coding->charbuf_used;
4730   /* We may produce one charset annotation in one loop and one more at
4731      the end.  */
4732   int *charbuf_end
4733     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4734   EMACS_INT consumed_chars = 0, consumed_chars_base;
4735   int multibytep = coding->src_multibyte;
4736   struct charset *charset_roman, *charset_kanji, *charset_kana;
4737   struct charset *charset_kanji2;
4738   Lisp_Object attrs, charset_list, val;
4739   EMACS_INT char_offset = coding->produced_char;
4740   EMACS_INT last_offset = char_offset;
4741   int last_id = charset_ascii;
4742   int eol_dos =
4743     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4744   int byte_after_cr = -1;
4745
4746   CODING_GET_INFO (coding, attrs, charset_list);
4747
4748   val = charset_list;
4749   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4750   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4751   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4752   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4753
4754   while (1)
4755     {
4756       int c, c1;
4757       struct charset *charset;
4758
4759       src_base = src;
4760       consumed_chars_base = consumed_chars;
4761
4762       if (charbuf >= charbuf_end)
4763         {
4764           if (byte_after_cr >= 0)
4765             src_base--;
4766           break;
4767         }
4768
4769       if (byte_after_cr >= 0)
4770         c = byte_after_cr, byte_after_cr = -1;
4771       else
4772         ONE_MORE_BYTE (c);
4773       if (c < 0)
4774         goto invalid_code;
4775       if (c < 0x80)
4776         {
4777           if (eol_dos && c == '\r')
4778             ONE_MORE_BYTE (byte_after_cr);
4779           charset = charset_roman;
4780         }
4781       else if (c == 0x80 || c == 0xA0)
4782         goto invalid_code;
4783       else if (c >= 0xA1 && c <= 0xDF)
4784         {
4785           /* SJIS -> JISX0201-Kana */
4786           c &= 0x7F;
4787           charset = charset_kana;
4788         }
4789       else if (c <= 0xEF)
4790         {
4791           /* SJIS -> JISX0208 */
4792           ONE_MORE_BYTE (c1);
4793           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4794             goto invalid_code;
4795           c = (c << 8) | c1;
4796           SJIS_TO_JIS (c);
4797           charset = charset_kanji;
4798         }
4799       else if (c <= 0xFC && charset_kanji2)
4800         {
4801           /* SJIS -> JISX0213-2 */
4802           ONE_MORE_BYTE (c1);
4803           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4804             goto invalid_code;
4805           c = (c << 8) | c1;
4806           SJIS_TO_JIS2 (c);
4807           charset = charset_kanji2;
4808         }
4809       else
4810         goto invalid_code;
4811       if (charset->id != charset_ascii
4812           && last_id != charset->id)
4813         {
4814           if (last_id != charset_ascii)
4815             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4816           last_id = charset->id;
4817           last_offset = char_offset;
4818         }
4819       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4820       *charbuf++ = c;
4821       char_offset++;
4822       continue;
4823
4824     invalid_code:
4825       src = src_base;
4826       consumed_chars = consumed_chars_base;
4827       ONE_MORE_BYTE (c);
4828       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4829       char_offset++;
4830       coding->errors++;
4831     }
4832
4833  no_more_source:
4834   if (last_id != charset_ascii)
4835     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4836   coding->consumed_char += consumed_chars_base;
4837   coding->consumed = src_base - coding->source;
4838   coding->charbuf_used = charbuf - coding->charbuf;
4839 }
4840
4841 static void
4842 decode_coding_big5 (struct coding_system *coding)
4843 {
4844   const unsigned char *src = coding->source + coding->consumed;
4845   const unsigned char *src_end = coding->source + coding->src_bytes;
4846   const unsigned char *src_base;
4847   int *charbuf = coding->charbuf + coding->charbuf_used;
4848   /* We may produce one charset annotation in one loop and one more at
4849      the end.  */
4850   int *charbuf_end
4851     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4852   EMACS_INT consumed_chars = 0, consumed_chars_base;
4853   int multibytep = coding->src_multibyte;
4854   struct charset *charset_roman, *charset_big5;
4855   Lisp_Object attrs, charset_list, val;
4856   EMACS_INT char_offset = coding->produced_char;
4857   EMACS_INT last_offset = char_offset;
4858   int last_id = charset_ascii;
4859   int eol_dos =
4860     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4861   int byte_after_cr = -1;
4862
4863   CODING_GET_INFO (coding, attrs, charset_list);
4864   val = charset_list;
4865   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4866   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4867
4868   while (1)
4869     {
4870       int c, c1;
4871       struct charset *charset;
4872
4873       src_base = src;
4874       consumed_chars_base = consumed_chars;
4875
4876       if (charbuf >= charbuf_end)
4877         {
4878           if (byte_after_cr >= 0)
4879             src_base--;
4880           break;
4881         }
4882
4883       if (byte_after_cr >= 0)
4884         c = byte_after_cr, byte_after_cr = -1;
4885       else
4886         ONE_MORE_BYTE (c);
4887
4888       if (c < 0)
4889         goto invalid_code;
4890       if (c < 0x80)
4891         {
4892           if (eol_dos && c == '\r')
4893             ONE_MORE_BYTE (byte_after_cr);
4894           charset = charset_roman;
4895         }
4896       else
4897         {
4898           /* BIG5 -> Big5 */
4899           if (c < 0xA1 || c > 0xFE)
4900             goto invalid_code;
4901           ONE_MORE_BYTE (c1);
4902           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4903             goto invalid_code;
4904           c = c << 8 | c1;
4905           charset = charset_big5;
4906         }
4907       if (charset->id != charset_ascii
4908           && last_id != charset->id)
4909         {
4910           if (last_id != charset_ascii)
4911             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4912           last_id = charset->id;
4913           last_offset = char_offset;
4914         }
4915       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4916       *charbuf++ = c;
4917       char_offset++;
4918       continue;
4919
4920     invalid_code:
4921       src = src_base;
4922       consumed_chars = consumed_chars_base;
4923       ONE_MORE_BYTE (c);
4924       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4925       char_offset++;
4926       coding->errors++;
4927     }
4928
4929  no_more_source:
4930   if (last_id != charset_ascii)
4931     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4932   coding->consumed_char += consumed_chars_base;
4933   coding->consumed = src_base - coding->source;
4934   coding->charbuf_used = charbuf - coding->charbuf;
4935 }
4936
4937 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4938    This function can encode charsets `ascii', `katakana-jisx0201',
4939    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4940    are sure that all these charsets are registered as official charset
4941    (i.e. do not have extended leading-codes).  Characters of other
4942    charsets are produced without any encoding.  If SJIS_P is 1, encode
4943    SJIS text, else encode BIG5 text.  */
4944
4945 static int
4946 encode_coding_sjis (struct coding_system *coding)
4947 {
4948   int multibytep = coding->dst_multibyte;
4949   int *charbuf = coding->charbuf;
4950   int *charbuf_end = charbuf + coding->charbuf_used;
4951   unsigned char *dst = coding->destination + coding->produced;
4952   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4953   int safe_room = 4;
4954   EMACS_INT produced_chars = 0;
4955   Lisp_Object attrs, charset_list, val;
4956   int ascii_compatible;
4957   struct charset *charset_kanji, *charset_kana;
4958   struct charset *charset_kanji2;
4959   int c;
4960
4961   CODING_GET_INFO (coding, attrs, charset_list);
4962   val = XCDR (charset_list);
4963   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4964   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4965   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4966
4967   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4968
4969   while (charbuf < charbuf_end)
4970     {
4971       ASSURE_DESTINATION (safe_room);
4972       c = *charbuf++;
4973       /* Now encode the character C.  */
4974       if (ASCII_CHAR_P (c) && ascii_compatible)
4975         EMIT_ONE_ASCII_BYTE (c);
4976       else if (CHAR_BYTE8_P (c))
4977         {
4978           c = CHAR_TO_BYTE8 (c);
4979           EMIT_ONE_BYTE (c);
4980         }
4981       else
4982         {
4983           unsigned code;
4984           struct charset *charset;
4985           CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
4986                                &code, charset);
4987
4988           if (!charset)
4989             {
4990               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4991                 {
4992                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4993                   charset = CHARSET_FROM_ID (charset_ascii);
4994                 }
4995               else
4996                 {
4997                   c = coding->default_char;
4998                   CODING_CHAR_CHARSET (coding, dst, dst_end, c,
4999                                        charset_list, &code, charset);
5000                 }
5001             }
5002           if (code == CHARSET_INVALID_CODE (charset))
5003             abort ();
5004           if (charset == charset_kanji)
5005             {
5006               int c1, c2;
5007               JIS_TO_SJIS (code);
5008               c1 = code >> 8, c2 = code & 0xFF;
5009               EMIT_TWO_BYTES (c1, c2);
5010             }
5011           else if (charset == charset_kana)
5012             EMIT_ONE_BYTE (code | 0x80);
5013           else if (charset_kanji2 && charset == charset_kanji2)
5014             {
5015               int c1, c2;
5016
5017               c1 = code >> 8;
5018               if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5019                   || c1 == 0x28
5020                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5021                 {
5022                   JIS_TO_SJIS2 (code);
5023                   c1 = code >> 8, c2 = code & 0xFF;
5024                   EMIT_TWO_BYTES (c1, c2);
5025                 }
5026               else
5027                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5028             }
5029           else
5030             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5031         }
5032     }
5033   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5034   coding->produced_char += produced_chars;
5035   coding->produced = dst - coding->destination;
5036   return 0;
5037 }
5038
5039 static int
5040 encode_coding_big5 (struct coding_system *coding)
5041 {
5042   int multibytep = coding->dst_multibyte;
5043   int *charbuf = coding->charbuf;
5044   int *charbuf_end = charbuf + coding->charbuf_used;
5045   unsigned char *dst = coding->destination + coding->produced;
5046   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5047   int safe_room = 4;
5048   EMACS_INT produced_chars = 0;
5049   Lisp_Object attrs, charset_list, val;
5050   int ascii_compatible;
5051   struct charset *charset_big5;
5052   int c;
5053
5054   CODING_GET_INFO (coding, attrs, charset_list);
5055   val = XCDR (charset_list);
5056   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5057   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5058
5059   while (charbuf < charbuf_end)
5060     {
5061       ASSURE_DESTINATION (safe_room);
5062       c = *charbuf++;
5063       /* Now encode the character C.  */
5064       if (ASCII_CHAR_P (c) && ascii_compatible)
5065         EMIT_ONE_ASCII_BYTE (c);
5066       else if (CHAR_BYTE8_P (c))
5067         {
5068           c = CHAR_TO_BYTE8 (c);
5069           EMIT_ONE_BYTE (c);
5070         }
5071       else
5072         {
5073           unsigned code;
5074           struct charset *charset;
5075           CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5076                                &code, charset);
5077
5078           if (! charset)
5079             {
5080               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5081                 {
5082                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5083                   charset = CHARSET_FROM_ID (charset_ascii);
5084                 }
5085               else
5086                 {
5087                   c = coding->default_char;
5088                   CODING_CHAR_CHARSET (coding, dst, dst_end, c,
5089                                        charset_list, &code, charset);
5090                 }
5091             }
5092           if (code == CHARSET_INVALID_CODE (charset))
5093             abort ();
5094           if (charset == charset_big5)
5095             {
5096               int c1, c2;
5097
5098               c1 = code >> 8, c2 = code & 0xFF;
5099               EMIT_TWO_BYTES (c1, c2);
5100             }
5101           else
5102             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5103         }
5104     }
5105   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5106   coding->produced_char += produced_chars;
5107   coding->produced = dst - coding->destination;
5108   return 0;
5109 }
5110
5111 \f
5112 /*** 10. CCL handlers ***/
5113
5114 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5115    Check if a text is encoded in a coding system of which
5116    encoder/decoder are written in CCL program.  If it is, return
5117    CATEGORY_MASK_CCL, else return 0.  */
5118
5119 static int
5120 detect_coding_ccl (struct coding_system *coding,
5121                    struct coding_detection_info *detect_info)
5122 {
5123   const unsigned char *src = coding->source, *src_base;
5124   const unsigned char *src_end = coding->source + coding->src_bytes;
5125   int multibytep = coding->src_multibyte;
5126   EMACS_INT consumed_chars = 0;
5127   int found = 0;
5128   unsigned char *valids;
5129   EMACS_INT head_ascii = coding->head_ascii;
5130   Lisp_Object attrs;
5131
5132   detect_info->checked |= CATEGORY_MASK_CCL;
5133
5134   coding = &coding_categories[coding_category_ccl];
5135   valids = CODING_CCL_VALIDS (coding);
5136   attrs = CODING_ID_ATTRS (coding->id);
5137   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5138     src += head_ascii;
5139
5140   while (1)
5141     {
5142       int c;
5143
5144       src_base = src;
5145       ONE_MORE_BYTE (c);
5146       if (c < 0 || ! valids[c])
5147         break;
5148       if ((valids[c] > 1))
5149         found = CATEGORY_MASK_CCL;
5150     }
5151   detect_info->rejected |= CATEGORY_MASK_CCL;
5152   return 0;
5153
5154  no_more_source:
5155   detect_info->found |= found;
5156   return 1;
5157 }
5158
5159 static void
5160 decode_coding_ccl (struct coding_system *coding)
5161 {
5162   const unsigned char *src = coding->source + coding->consumed;
5163   const unsigned char *src_end = coding->source + coding->src_bytes;
5164   int *charbuf = coding->charbuf + coding->charbuf_used;
5165   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5166   EMACS_INT consumed_chars = 0;
5167   int multibytep = coding->src_multibyte;
5168   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5169   int source_charbuf[1024];
5170   int source_byteidx[1025];
5171   Lisp_Object attrs, charset_list;
5172
5173   CODING_GET_INFO (coding, attrs, charset_list);
5174
5175   while (1)
5176     {
5177       const unsigned char *p = src;
5178       int i = 0;
5179
5180       if (multibytep)
5181         {
5182           while (i < 1024 && p < src_end)
5183             {
5184               source_byteidx[i] = p - src;
5185               source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5186             }
5187           source_byteidx[i] = p - src;
5188         }
5189       else
5190         while (i < 1024 && p < src_end)
5191           source_charbuf[i++] = *p++;
5192
5193       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5194         ccl->last_block = 1;
5195       ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5196                   charset_list);
5197       charbuf += ccl->produced;
5198       if (multibytep)
5199         src += source_byteidx[ccl->consumed];
5200       else
5201         src += ccl->consumed;
5202       consumed_chars += ccl->consumed;
5203       if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
5204         break;
5205     }
5206
5207   switch (ccl->status)
5208     {
5209     case CCL_STAT_SUSPEND_BY_SRC:
5210       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5211       break;
5212     case CCL_STAT_SUSPEND_BY_DST:
5213       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5214       break;
5215     case CCL_STAT_QUIT:
5216     case CCL_STAT_INVALID_CMD:
5217       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5218       break;
5219     default:
5220       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5221       break;
5222     }
5223   coding->consumed_char += consumed_chars;
5224   coding->consumed = src - coding->source;
5225   coding->charbuf_used = charbuf - coding->charbuf;
5226 }
5227
5228 static int
5229 encode_coding_ccl (struct coding_system *coding)
5230 {
5231   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5232   int multibytep = coding->dst_multibyte;
5233   int *charbuf = coding->charbuf;
5234   int *charbuf_end = charbuf + coding->charbuf_used;
5235   unsigned char *dst = coding->destination + coding->produced;
5236   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5237   int destination_charbuf[1024];
5238   EMACS_INT produced_chars = 0;
5239   int i;
5240   Lisp_Object attrs, charset_list;
5241
5242   CODING_GET_INFO (coding, attrs, charset_list);
5243   if (coding->consumed_char == coding->src_chars
5244       && coding->mode & CODING_MODE_LAST_BLOCK)
5245     ccl->last_block = 1;
5246
5247   do
5248     {
5249       ccl_driver (ccl, charbuf, destination_charbuf,
5250                   charbuf_end - charbuf, 1024, charset_list);
5251       if (multibytep)
5252         {
5253           ASSURE_DESTINATION (ccl->produced * 2);
5254           for (i = 0; i < ccl->produced; i++)
5255             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5256         }
5257       else
5258         {
5259           ASSURE_DESTINATION (ccl->produced);
5260           for (i = 0; i < ccl->produced; i++)
5261             *dst++ = destination_charbuf[i] & 0xFF;
5262           produced_chars += ccl->produced;
5263         }
5264       charbuf += ccl->consumed;
5265       if (ccl->status == CCL_STAT_QUIT
5266           || ccl->status == CCL_STAT_INVALID_CMD)
5267         break;
5268     }
5269   while (charbuf < charbuf_end);
5270
5271   switch (ccl->status)
5272     {
5273     case CCL_STAT_SUSPEND_BY_SRC:
5274       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5275       break;
5276     case CCL_STAT_SUSPEND_BY_DST:
5277       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5278       break;
5279     case CCL_STAT_QUIT:
5280     case CCL_STAT_INVALID_CMD:
5281       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5282       break;
5283     default:
5284       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5285       break;
5286     }
5287
5288   coding->produced_char += produced_chars;
5289   coding->produced = dst - coding->destination;
5290   return 0;
5291 }
5292
5293
5294 \f
5295 /*** 10, 11. no-conversion handlers ***/
5296
5297 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5298
5299 static void
5300 decode_coding_raw_text (struct coding_system *coding)
5301 {
5302   int eol_dos =
5303     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5304
5305   coding->chars_at_source = 1;
5306   coding->consumed_char = coding->src_chars;
5307   coding->consumed = coding->src_bytes;
5308   if (eol_dos && coding->source[coding->src_bytes - 1] == '\r')
5309     {
5310       coding->consumed_char--;
5311       coding->consumed--;
5312       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5313     }
5314   else
5315     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5316 }
5317
5318 static int
5319 encode_coding_raw_text (struct coding_system *coding)
5320 {
5321   int multibytep = coding->dst_multibyte;
5322   int *charbuf = coding->charbuf;
5323   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5324   unsigned char *dst = coding->destination + coding->produced;
5325   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5326   EMACS_INT produced_chars = 0;
5327   int c;
5328
5329   if (multibytep)
5330     {
5331       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5332
5333       if (coding->src_multibyte)
5334         while (charbuf < charbuf_end)
5335           {
5336             ASSURE_DESTINATION (safe_room);
5337             c = *charbuf++;
5338             if (ASCII_CHAR_P (c))
5339               EMIT_ONE_ASCII_BYTE (c);
5340             else if (CHAR_BYTE8_P (c))
5341               {
5342                 c = CHAR_TO_BYTE8 (c);
5343                 EMIT_ONE_BYTE (c);
5344               }
5345             else
5346               {
5347                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5348
5349                 CHAR_STRING_ADVANCE (c, p1);
5350                 do
5351                   {
5352                     EMIT_ONE_BYTE (*p0);
5353                     p0++;
5354                   }
5355                 while (p0 < p1);
5356               }
5357           }
5358       else
5359         while (charbuf < charbuf_end)
5360           {
5361             ASSURE_DESTINATION (safe_room);
5362             c = *charbuf++;
5363             EMIT_ONE_BYTE (c);
5364           }
5365     }
5366   else
5367     {
5368       if (coding->src_multibyte)
5369         {
5370           int safe_room = MAX_MULTIBYTE_LENGTH;
5371
5372           while (charbuf < charbuf_end)
5373             {
5374               ASSURE_DESTINATION (safe_room);
5375               c = *charbuf++;
5376               if (ASCII_CHAR_P (c))
5377                 *dst++ = c;
5378               else if (CHAR_BYTE8_P (c))
5379                 *dst++ = CHAR_TO_BYTE8 (c);
5380               else
5381                 CHAR_STRING_ADVANCE (c, dst);
5382             }
5383         }
5384       else
5385         {
5386           ASSURE_DESTINATION (charbuf_end - charbuf);
5387           while (charbuf < charbuf_end && dst < dst_end)
5388             *dst++ = *charbuf++;
5389         }
5390       produced_chars = dst - (coding->destination + coding->produced);
5391     }
5392   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5393   coding->produced_char += produced_chars;
5394   coding->produced = dst - coding->destination;
5395   return 0;
5396 }
5397
5398 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5399    Check if a text is encoded in a charset-based coding system.  If it
5400    is, return 1, else return 0.  */
5401
5402 static int
5403 detect_coding_charset (struct coding_system *coding,
5404                        struct coding_detection_info *detect_info)
5405 {
5406   const unsigned char *src = coding->source, *src_base;
5407   const unsigned char *src_end = coding->source + coding->src_bytes;
5408   int multibytep = coding->src_multibyte;
5409   EMACS_INT consumed_chars = 0;
5410   Lisp_Object attrs, valids, name;
5411   int found = 0;
5412   EMACS_INT head_ascii = coding->head_ascii;
5413   int check_latin_extra = 0;
5414
5415   detect_info->checked |= CATEGORY_MASK_CHARSET;
5416
5417   coding = &coding_categories[coding_category_charset];
5418   attrs = CODING_ID_ATTRS (coding->id);
5419   valids = AREF (attrs, coding_attr_charset_valids);
5420   name = CODING_ID_NAME (coding->id);
5421   if (strncmp (SSDATA (SYMBOL_NAME (name)),
5422                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5423       || strncmp (SSDATA (SYMBOL_NAME (name)),
5424                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5425     check_latin_extra = 1;
5426
5427   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5428     src += head_ascii;
5429
5430   while (1)
5431     {
5432       int c;
5433       Lisp_Object val;
5434       struct charset *charset;
5435       int dim, idx;
5436
5437       src_base = src;
5438       ONE_MORE_BYTE (c);
5439       if (c < 0)
5440         continue;
5441       val = AREF (valids, c);
5442       if (NILP (val))
5443         break;
5444       if (c >= 0x80)
5445         {
5446           if (c < 0xA0
5447               && check_latin_extra
5448               && (!VECTORP (Vlatin_extra_code_table)
5449                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5450             break;
5451           found = CATEGORY_MASK_CHARSET;
5452         }
5453       if (INTEGERP (val))
5454         {
5455           charset = CHARSET_FROM_ID (XFASTINT (val));
5456           dim = CHARSET_DIMENSION (charset);
5457           for (idx = 1; idx < dim; idx++)
5458             {
5459               if (src == src_end)
5460                 goto too_short;
5461               ONE_MORE_BYTE (c);
5462               if (c < charset->code_space[(dim - 1 - idx) * 4]
5463                   || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5464                 break;
5465             }
5466           if (idx < dim)
5467             break;
5468         }
5469       else
5470         {
5471           idx = 1;
5472           for (; CONSP (val); val = XCDR (val))
5473             {
5474               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5475               dim = CHARSET_DIMENSION (charset);
5476               while (idx < dim)
5477                 {
5478                   if (src == src_end)
5479                     goto too_short;
5480                   ONE_MORE_BYTE (c);
5481                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5482                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5483                     break;
5484                   idx++;
5485                 }
5486               if (idx == dim)
5487                 {
5488                   val = Qnil;
5489                   break;
5490                 }
5491             }
5492           if (CONSP (val))
5493             break;
5494         }
5495     }
5496  too_short:
5497   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5498   return 0;
5499
5500  no_more_source:
5501   detect_info->found |= found;
5502   return 1;
5503 }
5504
5505 static void
5506 decode_coding_charset (struct coding_system *coding)
5507 {
5508   const unsigned char *src = coding->source + coding->consumed;
5509   const unsigned char *src_end = coding->source + coding->src_bytes;
5510   const unsigned char *src_base;
5511   int *charbuf = coding->charbuf + coding->charbuf_used;
5512   /* We may produce one charset annotation in one loop and one more at
5513      the end.  */
5514   int *charbuf_end
5515     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5516   EMACS_INT consumed_chars = 0, consumed_chars_base;
5517   int multibytep = coding->src_multibyte;
5518   Lisp_Object attrs = CODING_ID_ATTRS (coding->id);
5519   Lisp_Object valids;
5520   EMACS_INT char_offset = coding->produced_char;
5521   EMACS_INT last_offset = char_offset;
5522   int last_id = charset_ascii;
5523   int eol_dos =
5524     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5525   int byte_after_cr = -1;
5526
5527   valids = AREF (attrs, coding_attr_charset_valids);
5528
5529   while (1)
5530     {
5531       int c;
5532       Lisp_Object val;
5533       struct charset *charset;
5534       int dim;
5535       int len = 1;
5536       unsigned code;
5537
5538       src_base = src;
5539       consumed_chars_base = consumed_chars;
5540
5541       if (charbuf >= charbuf_end)
5542         {
5543           if (byte_after_cr >= 0)
5544             src_base--;
5545           break;
5546         }
5547
5548       if (byte_after_cr >= 0)
5549         {
5550           c = byte_after_cr;
5551           byte_after_cr = -1;
5552         }
5553       else
5554         {
5555           ONE_MORE_BYTE (c);
5556           if (eol_dos && c == '\r')
5557             ONE_MORE_BYTE (byte_after_cr);
5558         }
5559       if (c < 0)
5560         goto invalid_code;
5561       code = c;
5562
5563       val = AREF (valids, c);
5564       if (! INTEGERP (val) && ! CONSP (val))
5565         goto invalid_code;
5566       if (INTEGERP (val))
5567         {
5568           charset = CHARSET_FROM_ID (XFASTINT (val));
5569           dim = CHARSET_DIMENSION (charset);
5570           while (len < dim)
5571             {
5572               ONE_MORE_BYTE (c);
5573               code = (code << 8) | c;
5574               len++;
5575             }
5576           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5577                               charset, code, c);
5578         }
5579       else
5580         {
5581           /* VAL is a list of charset IDs.  It is assured that the
5582              list is sorted by charset dimensions (smaller one
5583              comes first).  */
5584           while (CONSP (val))
5585             {
5586               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5587               dim = CHARSET_DIMENSION (charset);
5588               while (len < dim)
5589                 {
5590                   ONE_MORE_BYTE (c);
5591                   code = (code << 8) | c;
5592                   len++;
5593                 }
5594               CODING_DECODE_CHAR (coding, src, src_base,
5595                                   src_end, charset, code, c);
5596               if (c >= 0)
5597                 break;
5598               val = XCDR (val);
5599             }
5600         }
5601       if (c < 0)
5602         goto invalid_code;
5603       if (charset->id != charset_ascii
5604           && last_id != charset->id)
5605         {
5606           if (last_id != charset_ascii)
5607             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5608           last_id = charset->id;
5609           last_offset = char_offset;
5610         }
5611
5612       *charbuf++ = c;
5613       char_offset++;
5614       continue;
5615
5616     invalid_code:
5617       src = src_base;
5618       consumed_chars = consumed_chars_base;
5619       ONE_MORE_BYTE (c);
5620       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5621       char_offset++;
5622       coding->errors++;
5623     }
5624
5625  no_more_source:
5626   if (last_id != charset_ascii)
5627     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5628   coding->consumed_char += consumed_chars_base;
5629   coding->consumed = src_base - coding->source;
5630   coding->charbuf_used = charbuf - coding->charbuf;
5631 }
5632
5633 static int
5634 encode_coding_charset (struct coding_system *coding)
5635 {
5636   int multibytep = coding->dst_multibyte;
5637   int *charbuf = coding->charbuf;
5638   int *charbuf_end = charbuf + coding->charbuf_used;
5639   unsigned char *dst = coding->destination + coding->produced;
5640   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5641   int safe_room = MAX_MULTIBYTE_LENGTH;
5642   EMACS_INT produced_chars = 0;
5643   Lisp_Object attrs, charset_list;
5644   int ascii_compatible;
5645   int c;
5646
5647   CODING_GET_INFO (coding, attrs, charset_list);
5648   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5649
5650   while (charbuf < charbuf_end)
5651     {
5652       struct charset *charset;
5653       unsigned code;
5654
5655       ASSURE_DESTINATION (safe_room);
5656       c = *charbuf++;
5657       if (ascii_compatible && ASCII_CHAR_P (c))
5658         EMIT_ONE_ASCII_BYTE (c);
5659       else if (CHAR_BYTE8_P (c))
5660         {
5661           c = CHAR_TO_BYTE8 (c);
5662           EMIT_ONE_BYTE (c);
5663         }
5664       else
5665         {
5666           CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list,
5667                                &code, charset);
5668
5669           if (charset)
5670             {
5671               if (CHARSET_DIMENSION (charset) == 1)
5672                 EMIT_ONE_BYTE (code);
5673               else if (CHARSET_DIMENSION (charset) == 2)
5674                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5675               else if (CHARSET_DIMENSION (charset) == 3)
5676                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5677               else
5678                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5679                                  (code >> 8) & 0xFF, code & 0xFF);
5680             }
5681           else
5682             {
5683               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5684                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5685               else
5686                 c = coding->default_char;
5687               EMIT_ONE_BYTE (c);
5688             }
5689         }
5690     }
5691
5692   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5693   coding->produced_char += produced_chars;
5694   coding->produced = dst - coding->destination;
5695   return 0;
5696 }
5697
5698 \f
5699 /*** 7. C library functions ***/
5700
5701 /* Setup coding context CODING from information about CODING_SYSTEM.
5702    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5703    CODING_SYSTEM is invalid, signal an error.  */
5704
5705 void
5706 setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5707 {
5708   Lisp_Object attrs;
5709   Lisp_Object eol_type;
5710   Lisp_Object coding_type;
5711   Lisp_Object val;
5712
5713   if (NILP (coding_system))
5714     coding_system = Qundecided;
5715
5716   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5717
5718   attrs = CODING_ID_ATTRS (coding->id);
5719   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5720
5721   coding->mode = 0;
5722   coding->head_ascii = -1;
5723   if (VECTORP (eol_type))
5724     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5725                             | CODING_REQUIRE_DETECTION_MASK);
5726   else if (! EQ (eol_type, Qunix))
5727     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5728                             | CODING_REQUIRE_ENCODING_MASK);
5729   else
5730     coding->common_flags = 0;
5731   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5732     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5733   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5734     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5735   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5736     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5737
5738   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5739   coding->max_charset_id = SCHARS (val) - 1;
5740   coding->safe_charsets = SDATA (val);
5741   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5742   coding->carryover_bytes = 0;
5743
5744   coding_type = CODING_ATTR_TYPE (attrs);
5745   if (EQ (coding_type, Qundecided))
5746     {
5747       coding->detector = NULL;
5748       coding->decoder = decode_coding_raw_text;
5749       coding->encoder = encode_coding_raw_text;
5750       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5751     }
5752   else if (EQ (coding_type, Qiso_2022))
5753     {
5754       int i;
5755       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5756
5757       /* Invoke graphic register 0 to plane 0.  */
5758       CODING_ISO_INVOCATION (coding, 0) = 0;
5759       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5760       CODING_ISO_INVOCATION (coding, 1)
5761         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5762       /* Setup the initial status of designation.  */
5763       for (i = 0; i < 4; i++)
5764         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5765       /* Not single shifting initially.  */
5766       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5767       /* Beginning of buffer should also be regarded as bol. */
5768       CODING_ISO_BOL (coding) = 1;
5769       coding->detector = detect_coding_iso_2022;
5770       coding->decoder = decode_coding_iso_2022;
5771       coding->encoder = encode_coding_iso_2022;
5772       if (flags & CODING_ISO_FLAG_SAFE)
5773         coding->mode |= CODING_MODE_SAFE_ENCODING;
5774       coding->common_flags
5775         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5776             | CODING_REQUIRE_FLUSHING_MASK);
5777       if (flags & CODING_ISO_FLAG_COMPOSITION)
5778         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5779       if (flags & CODING_ISO_FLAG_DESIGNATION)
5780         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5781       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5782         {
5783           setup_iso_safe_charsets (attrs);
5784           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5785           coding->max_charset_id = SCHARS (val) - 1;
5786           coding->safe_charsets = SDATA (val);
5787         }
5788       CODING_ISO_FLAGS (coding) = flags;
5789       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5790       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5791       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5792       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5793     }
5794   else if (EQ (coding_type, Qcharset))
5795     {
5796       coding->detector = detect_coding_charset;
5797       coding->decoder = decode_coding_charset;
5798       coding->encoder = encode_coding_charset;
5799       coding->common_flags
5800         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5801     }
5802   else if (EQ (coding_type, Qutf_8))
5803     {
5804       val = AREF (attrs, coding_attr_utf_bom);
5805       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5806                                    : EQ (val, Qt) ? utf_with_bom
5807                                    : utf_without_bom);
5808       coding->detector = detect_coding_utf_8;
5809       coding->decoder = decode_coding_utf_8;
5810       coding->encoder = encode_coding_utf_8;
5811       coding->common_flags
5812         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5813       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5814         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5815     }
5816   else if (EQ (coding_type, Qutf_16))
5817     {
5818       val = AREF (attrs, coding_attr_utf_bom);
5819       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5820                                     : EQ (val, Qt) ? utf_with_bom
5821                                     : utf_without_bom);
5822       val = AREF (attrs, coding_attr_utf_16_endian);
5823       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5824                                        : utf_16_little_endian);
5825       CODING_UTF_16_SURROGATE (coding) = 0;
5826       coding->detector = detect_coding_utf_16;
5827       coding->decoder = decode_coding_utf_16;
5828       coding->encoder = encode_coding_utf_16;
5829       coding->common_flags
5830         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5831       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5832         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5833     }
5834   else if (EQ (coding_type, Qccl))
5835     {
5836       coding->detector = detect_coding_ccl;
5837       coding->decoder = decode_coding_ccl;
5838       coding->encoder = encode_coding_ccl;
5839       coding->common_flags
5840         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5841             | CODING_REQUIRE_FLUSHING_MASK);
5842     }
5843   else if (EQ (coding_type, Qemacs_mule))
5844     {
5845       coding->detector = detect_coding_emacs_mule;
5846       coding->decoder = decode_coding_emacs_mule;
5847       coding->encoder = encode_coding_emacs_mule;
5848       coding->common_flags
5849         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5850       coding->spec.emacs_mule.full_support = 1;
5851       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5852           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5853         {
5854           Lisp_Object tail, safe_charsets;
5855           int max_charset_id = 0;
5856
5857           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5858                tail = XCDR (tail))
5859             if (max_charset_id < XFASTINT (XCAR (tail)))
5860               max_charset_id = XFASTINT (XCAR (tail));
5861           safe_charsets = make_uninit_string (max_charset_id + 1);
5862           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5863           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5864                tail = XCDR (tail))
5865             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5866           coding->max_charset_id = max_charset_id;
5867           coding->safe_charsets = SDATA (safe_charsets);
5868           coding->spec.emacs_mule.full_support = 1;
5869         }
5870       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5871       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5872     }
5873   else if (EQ (coding_type, Qshift_jis))
5874     {
5875       coding->detector = detect_coding_sjis;
5876       coding->decoder = decode_coding_sjis;
5877       coding->encoder = encode_coding_sjis;
5878       coding->common_flags
5879         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5880     }
5881   else if (EQ (coding_type, Qbig5))
5882     {
5883       coding->detector = detect_coding_big5;
5884       coding->decoder = decode_coding_big5;
5885       coding->encoder = encode_coding_big5;
5886       coding->common_flags
5887         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5888     }
5889   else                          /* EQ (coding_type, Qraw_text) */
5890     {
5891       coding->detector = NULL;
5892       coding->decoder = decode_coding_raw_text;
5893       coding->encoder = encode_coding_raw_text;
5894       if (! EQ (eol_type, Qunix))
5895         {
5896           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5897           if (! VECTORP (eol_type))
5898             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5899         }
5900
5901     }
5902
5903   return;
5904 }
5905
5906 /* Return a list of charsets supported by CODING.  */
5907
5908 Lisp_Object
5909 coding_charset_list (struct coding_system *coding)
5910 {
5911   Lisp_Object attrs, charset_list;
5912
5913   CODING_GET_INFO (coding, attrs, charset_list);
5914   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5915     {
5916       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5917
5918       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5919         charset_list = Viso_2022_charset_list;
5920     }
5921   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5922     {
5923       charset_list = Vemacs_mule_charset_list;
5924     }
5925   return charset_list;
5926 }
5927
5928
5929 /* Return a list of charsets supported by CODING-SYSTEM.  */
5930
5931 Lisp_Object
5932 coding_system_charset_list (Lisp_Object coding_system)
5933 {
5934   ptrdiff_t id;
5935   Lisp_Object attrs, charset_list;
5936
5937   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5938   attrs = CODING_ID_ATTRS (id);
5939
5940   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5941     {
5942       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5943
5944       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5945         charset_list = Viso_2022_charset_list;
5946       else
5947         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5948     }
5949   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5950     {
5951       charset_list = Vemacs_mule_charset_list;
5952     }
5953   else
5954     {
5955       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5956     }
5957   return charset_list;
5958 }
5959
5960
5961 /* Return raw-text or one of its subsidiaries that has the same
5962    eol_type as CODING-SYSTEM.  */
5963
5964 Lisp_Object
5965 raw_text_coding_system (Lisp_Object coding_system)
5966 {
5967   Lisp_Object spec, attrs;
5968   Lisp_Object eol_type, raw_text_eol_type;
5969
5970   if (NILP (coding_system))
5971     return Qraw_text;
5972   spec = CODING_SYSTEM_SPEC (coding_system);
5973   attrs = AREF (spec, 0);
5974
5975   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5976     return coding_system;
5977
5978   eol_type = AREF (spec, 2);
5979   if (VECTORP (eol_type))
5980     return Qraw_text;
5981   spec = CODING_SYSTEM_SPEC (Qraw_text);
5982   raw_text_eol_type = AREF (spec, 2);
5983   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5984           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5985           : AREF (raw_text_eol_type, 2));
5986 }
5987
5988
5989 /* If CODING_SYSTEM doesn't specify end-of-line format, return one of
5990    the subsidiary that has the same eol-spec as PARENT (if it is not
5991    nil and specifies end-of-line format) or the system's setting
5992    (system_eol_type).  */
5993
5994 Lisp_Object
5995 coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
5996 {
5997   Lisp_Object spec, eol_type;
5998
5999   if (NILP (coding_system))
6000     coding_system = Qraw_text;
6001   spec = CODING_SYSTEM_SPEC (coding_system);
6002   eol_type = AREF (spec, 2);
6003   if (VECTORP (eol_type))
6004     {
6005       Lisp_Object parent_eol_type;
6006
6007       if (! NILP (parent))
6008         {
6009           Lisp_Object parent_spec;
6010
6011           parent_spec = CODING_SYSTEM_SPEC (parent);
6012           parent_eol_type = AREF (parent_spec, 2);
6013           if (VECTORP (parent_eol_type))
6014             parent_eol_type = system_eol_type;
6015         }
6016       else
6017         parent_eol_type = system_eol_type;
6018       if (EQ (parent_eol_type, Qunix))
6019         coding_system = AREF (eol_type, 0);
6020       else if (EQ (parent_eol_type, Qdos))
6021         coding_system = AREF (eol_type, 1);
6022       else if (EQ (parent_eol_type, Qmac))
6023         coding_system = AREF (eol_type, 2);
6024     }
6025   return coding_system;
6026 }
6027
6028
6029 /* Check if text-conversion and eol-conversion of CODING_SYSTEM are
6030    decided for writing to a process.  If not, complement them, and
6031    return a new coding system.  */
6032
6033 Lisp_Object
6034 complement_process_encoding_system (Lisp_Object coding_system)
6035 {
6036   Lisp_Object coding_base = Qnil, eol_base = Qnil;
6037   Lisp_Object spec, attrs;
6038   int i;
6039
6040   for (i = 0; i < 3; i++)
6041     {
6042       if (i == 1)
6043         coding_system = CDR_SAFE (Vdefault_process_coding_system);
6044       else if (i == 2)
6045         coding_system = preferred_coding_system ();
6046       spec = CODING_SYSTEM_SPEC (coding_system);
6047       if (NILP (spec))
6048         continue;
6049       attrs = AREF (spec, 0);
6050       if (NILP (coding_base) && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6051         coding_base = CODING_ATTR_BASE_NAME (attrs);
6052       if (NILP (eol_base) && ! VECTORP (AREF (spec, 2)))
6053         eol_base = coding_system;
6054       if (! NILP (coding_base) && ! NILP (eol_base))
6055         break;
6056     }
6057
6058   if (i > 0)
6059     /* The original CODING_SYSTEM didn't specify text-conversion or
6060        eol-conversion.  Be sure that we return a fully complemented
6061        coding system.  */
6062     coding_system = coding_inherit_eol_type (coding_base, eol_base);
6063   return coding_system;
6064 }
6065
6066
6067 /* Emacs has a mechanism to automatically detect a coding system if it
6068    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6069    it's impossible to distinguish some coding systems accurately
6070    because they use the same range of codes.  So, at first, coding
6071    systems are categorized into 7, those are:
6072
6073    o coding-category-emacs-mule
6074
6075         The category for a coding system which has the same code range
6076         as Emacs' internal format.  Assigned the coding-system (Lisp
6077         symbol) `emacs-mule' by default.
6078
6079    o coding-category-sjis
6080
6081         The category for a coding system which has the same code range
6082         as SJIS.  Assigned the coding-system (Lisp
6083         symbol) `japanese-shift-jis' by default.
6084
6085    o coding-category-iso-7
6086
6087         The category for a coding system which has the same code range
6088         as ISO2022 of 7-bit environment.  This doesn't use any locking
6089         shift and single shift functions.  This can encode/decode all
6090         charsets.  Assigned the coding-system (Lisp symbol)
6091         `iso-2022-7bit' by default.
6092
6093    o coding-category-iso-7-tight
6094
6095         Same as coding-category-iso-7 except that this can
6096         encode/decode only the specified charsets.
6097
6098    o coding-category-iso-8-1
6099
6100         The category for a coding system which has the same code range
6101         as ISO2022 of 8-bit environment and graphic plane 1 used only
6102         for DIMENSION1 charset.  This doesn't use any locking shift
6103         and single shift functions.  Assigned the coding-system (Lisp
6104         symbol) `iso-latin-1' by default.
6105
6106    o coding-category-iso-8-2
6107
6108         The category for a coding system which has the same code range
6109         as ISO2022 of 8-bit environment and graphic plane 1 used only
6110         for DIMENSION2 charset.  This doesn't use any locking shift
6111         and single shift functions.  Assigned the coding-system (Lisp
6112         symbol) `japanese-iso-8bit' by default.
6113
6114    o coding-category-iso-7-else
6115
6116         The category for a coding system which has the same code range
6117         as ISO2022 of 7-bit environment but uses locking shift or
6118         single shift functions.  Assigned the coding-system (Lisp
6119         symbol) `iso-2022-7bit-lock' by default.
6120
6121    o coding-category-iso-8-else
6122
6123         The category for a coding system which has the same code range
6124         as ISO2022 of 8-bit environment but uses locking shift or
6125         single shift functions.  Assigned the coding-system (Lisp
6126         symbol) `iso-2022-8bit-ss2' by default.
6127
6128    o coding-category-big5
6129
6130         The category for a coding system which has the same code range
6131         as BIG5.  Assigned the coding-system (Lisp symbol)
6132         `cn-big5' by default.
6133
6134    o coding-category-utf-8
6135
6136         The category for a coding system which has the same code range
6137         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6138         symbol) `utf-8' by default.
6139
6140    o coding-category-utf-16-be
6141
6142         The category for a coding system in which a text has an
6143         Unicode signature (cf. Unicode Standard) in the order of BIG
6144         endian at the head.  Assigned the coding-system (Lisp symbol)
6145         `utf-16-be' by default.
6146
6147    o coding-category-utf-16-le
6148
6149         The category for a coding system in which a text has an
6150         Unicode signature (cf. Unicode Standard) in the order of
6151         LITTLE endian at the head.  Assigned the coding-system (Lisp
6152         symbol) `utf-16-le' by default.
6153
6154    o coding-category-ccl
6155
6156         The category for a coding system of which encoder/decoder is
6157         written in CCL programs.  The default value is nil, i.e., no
6158         coding system is assigned.
6159
6160    o coding-category-binary
6161
6162         The category for a coding system not categorized in any of the
6163         above.  Assigned the coding-system (Lisp symbol)
6164         `no-conversion' by default.
6165
6166    Each of them is a Lisp symbol and the value is an actual
6167    `coding-system's (this is also a Lisp symbol) assigned by a user.
6168    What Emacs does actually is to detect a category of coding system.
6169    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6170    decide only one possible category, it selects a category of the
6171    highest priority.  Priorities of categories are also specified by a
6172    user in a Lisp variable `coding-category-list'.
6173
6174 */
6175
6176 #define EOL_SEEN_NONE   0
6177 #define EOL_SEEN_LF     1
6178 #define EOL_SEEN_CR     2
6179 #define EOL_SEEN_CRLF   4
6180
6181 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6182    SOURCE is encoded.  If CATEGORY is one of
6183    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6184    two-byte, else they are encoded by one-byte.
6185
6186    Return one of EOL_SEEN_XXX.  */
6187
6188 #define MAX_EOL_CHECK_COUNT 3
6189
6190 static int
6191 detect_eol (const unsigned char *source, EMACS_INT src_bytes,
6192             enum coding_category category)
6193 {
6194   const unsigned char *src = source, *src_end = src + src_bytes;
6195   unsigned char c;
6196   int total  = 0;
6197   int eol_seen = EOL_SEEN_NONE;
6198
6199   if ((1 << category) & CATEGORY_MASK_UTF_16)
6200     {
6201       int msb, lsb;
6202
6203       msb = category == (coding_category_utf_16_le
6204                          | coding_category_utf_16_le_nosig);
6205       lsb = 1 - msb;
6206
6207       while (src + 1 < src_end)
6208         {
6209           c = src[lsb];
6210           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6211             {
6212               int this_eol;
6213
6214               if (c == '\n')
6215                 this_eol = EOL_SEEN_LF;
6216               else if (src + 3 >= src_end
6217                        || src[msb + 2] != 0
6218                        || src[lsb + 2] != '\n')
6219                 this_eol = EOL_SEEN_CR;
6220               else
6221                 {
6222                   this_eol = EOL_SEEN_CRLF;
6223                   src += 2;
6224                 }
6225
6226               if (eol_seen == EOL_SEEN_NONE)
6227                 /* This is the first end-of-line.  */
6228                 eol_seen = this_eol;
6229               else if (eol_seen != this_eol)
6230                 {
6231                   /* The found type is different from what found before.
6232                      Allow for stray ^M characters in DOS EOL files.  */
6233                   if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6234                       || (eol_seen == EOL_SEEN_CRLF
6235                           && this_eol == EOL_SEEN_CR))
6236                     eol_seen = EOL_SEEN_CRLF;
6237                   else
6238                     {
6239                       eol_seen = EOL_SEEN_LF;
6240                       break;
6241                     }
6242                 }
6243               if (++total == MAX_EOL_CHECK_COUNT)
6244                 break;
6245             }
6246           src += 2;
6247         }
6248     }
6249   else
6250     while (src < src_end)
6251       {
6252         c = *src++;
6253         if (c == '\n' || c == '\r')
6254           {
6255             int this_eol;
6256
6257             if (c == '\n')
6258               this_eol = EOL_SEEN_LF;
6259             else if (src >= src_end || *src != '\n')
6260               this_eol = EOL_SEEN_CR;
6261             else
6262               this_eol = EOL_SEEN_CRLF, src++;
6263
6264             if (eol_seen == EOL_SEEN_NONE)
6265               /* This is the first end-of-line.  */
6266               eol_seen = this_eol;
6267             else if (eol_seen != this_eol)
6268               {
6269                 /* The found type is different from what found before.
6270                    Allow for stray ^M characters in DOS EOL files.  */
6271                 if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
6272                     || (eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR))
6273                   eol_seen = EOL_SEEN_CRLF;
6274                 else
6275                   {
6276                     eol_seen = EOL_SEEN_LF;
6277                     break;
6278                   }
6279               }
6280             if (++total == MAX_EOL_CHECK_COUNT)
6281               break;
6282           }
6283       }
6284   return eol_seen;
6285 }
6286
6287
6288 static Lisp_Object
6289 adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6290 {
6291   Lisp_Object eol_type;
6292
6293   eol_type = CODING_ID_EOL_TYPE (coding->id);
6294   if (eol_seen & EOL_SEEN_LF)
6295     {
6296       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6297       eol_type = Qunix;
6298     }
6299   else if (eol_seen & EOL_SEEN_CRLF)
6300     {
6301       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6302       eol_type = Qdos;
6303     }
6304   else if (eol_seen & EOL_SEEN_CR)
6305     {
6306       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6307       eol_type = Qmac;
6308     }
6309   return eol_type;
6310 }
6311
6312 /* Detect how a text specified in CODING is encoded.  If a coding
6313    system is detected, update fields of CODING by the detected coding
6314    system.  */
6315
6316 static void
6317 detect_coding (struct coding_system *coding)
6318 {
6319   const unsigned char *src, *src_end;
6320   int saved_mode = coding->mode;
6321
6322   coding->consumed = coding->consumed_char = 0;
6323   coding->produced = coding->produced_char = 0;
6324   coding_set_source (coding);
6325
6326   src_end = coding->source + coding->src_bytes;
6327   coding->head_ascii = 0;
6328
6329   /* If we have not yet decided the text encoding type, detect it
6330      now.  */
6331   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6332     {
6333       int c, i;
6334       struct coding_detection_info detect_info;
6335       int null_byte_found = 0, eight_bit_found = 0;
6336
6337       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6338       for (src = coding->source; src < src_end; src++)
6339         {
6340           c = *src;
6341           if (c & 0x80)
6342             {
6343               eight_bit_found = 1;
6344               if (null_byte_found)
6345                 break;
6346             }
6347           else if (c < 0x20)
6348             {
6349               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6350                   && ! inhibit_iso_escape_detection
6351                   && ! detect_info.checked)
6352                 {
6353                   if (detect_coding_iso_2022 (coding, &detect_info))
6354                     {
6355                       /* We have scanned the whole data.  */
6356                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6357                         {
6358                           /* We didn't find an 8-bit code.  We may
6359                              have found a null-byte, but it's very
6360                              rare that a binary file conforms to
6361                              ISO-2022.  */
6362                           src = src_end;
6363                           coding->head_ascii = src - coding->source;
6364                         }
6365                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6366                       break;
6367                     }
6368                 }
6369               else if (! c && !inhibit_null_byte_detection)
6370                 {
6371                   null_byte_found = 1;
6372                   if (eight_bit_found)
6373                     break;
6374                 }
6375               if (! eight_bit_found)
6376                 coding->head_ascii++;
6377             }
6378           else if (! eight_bit_found)
6379             coding->head_ascii++;
6380         }
6381
6382       if (null_byte_found || eight_bit_found
6383           || coding->head_ascii < coding->src_bytes
6384           || detect_info.found)
6385         {
6386           enum coding_category category;
6387           struct coding_system *this;
6388
6389           if (coding->head_ascii == coding->src_bytes)
6390             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6391             for (i = 0; i < coding_category_raw_text; i++)
6392               {
6393                 category = coding_priorities[i];
6394                 this = coding_categories + category;
6395                 if (detect_info.found & (1 << category))
6396                   break;
6397               }
6398           else
6399             {
6400               if (null_byte_found)
6401                 {
6402                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6403                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6404                 }
6405               for (i = 0; i < coding_category_raw_text; i++)
6406                 {
6407                   category = coding_priorities[i];
6408                   this = coding_categories + category;
6409                   if (this->id < 0)
6410                     {
6411                       /* No coding system of this category is defined.  */
6412                       detect_info.rejected |= (1 << category);
6413                     }
6414                   else if (category >= coding_category_raw_text)
6415                     continue;
6416                   else if (detect_info.checked & (1 << category))
6417                     {
6418                       if (detect_info.found & (1 << category))
6419                         break;
6420                     }
6421                   else if ((*(this->detector)) (coding, &detect_info)
6422                            && detect_info.found & (1 << category))
6423                     {
6424                       if (category == coding_category_utf_16_auto)
6425                         {
6426                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6427                             category = coding_category_utf_16_le;
6428                           else
6429                             category = coding_category_utf_16_be;
6430                         }
6431                       break;
6432                     }
6433                 }
6434             }
6435
6436           if (i < coding_category_raw_text)
6437             setup_coding_system (CODING_ID_NAME (this->id), coding);
6438           else if (null_byte_found)
6439             setup_coding_system (Qno_conversion, coding);
6440           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6441                    == CATEGORY_MASK_ANY)
6442             setup_coding_system (Qraw_text, coding);
6443           else if (detect_info.rejected)
6444             for (i = 0; i < coding_category_raw_text; i++)
6445               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6446                 {
6447                   this = coding_categories + coding_priorities[i];
6448                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6449                   break;
6450                 }
6451         }
6452     }
6453   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6454            == coding_category_utf_8_auto)
6455     {
6456       Lisp_Object coding_systems;
6457       struct coding_detection_info detect_info;
6458
6459       coding_systems
6460         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6461       detect_info.found = detect_info.rejected = 0;
6462       coding->head_ascii = 0;
6463       if (CONSP (coding_systems)
6464           && detect_coding_utf_8 (coding, &detect_info))
6465         {
6466           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6467             setup_coding_system (XCAR (coding_systems), coding);
6468           else
6469             setup_coding_system (XCDR (coding_systems), coding);
6470         }
6471     }
6472   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6473            == coding_category_utf_16_auto)
6474     {
6475       Lisp_Object coding_systems;
6476       struct coding_detection_info detect_info;
6477
6478       coding_systems
6479         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6480       detect_info.found = detect_info.rejected = 0;
6481       coding->head_ascii = 0;
6482       if (CONSP (coding_systems)
6483           && detect_coding_utf_16 (coding, &detect_info))
6484         {
6485           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6486             setup_coding_system (XCAR (coding_systems), coding);
6487           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6488             setup_coding_system (XCDR (coding_systems), coding);
6489         }
6490     }
6491   coding->mode = saved_mode;
6492 }
6493
6494
6495 static void
6496 decode_eol (struct coding_system *coding)
6497 {
6498   Lisp_Object eol_type;
6499   unsigned char *p, *pbeg, *pend;
6500
6501   eol_type = CODING_ID_EOL_TYPE (coding->id);
6502   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6503     return;
6504
6505   if (NILP (coding->dst_object))
6506     pbeg = coding->destination;
6507   else
6508     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6509   pend = pbeg + coding->produced;
6510
6511   if (VECTORP (eol_type))
6512     {
6513       int eol_seen = EOL_SEEN_NONE;
6514
6515       for (p = pbeg; p < pend; p++)
6516         {
6517           if (*p == '\n')
6518             eol_seen |= EOL_SEEN_LF;
6519           else if (*p == '\r')
6520             {
6521               if (p + 1 < pend && *(p + 1) == '\n')
6522                 {
6523                   eol_seen |= EOL_SEEN_CRLF;
6524                   p++;
6525                 }
6526               else
6527                 eol_seen |= EOL_SEEN_CR;
6528             }
6529         }
6530       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6531       if ((eol_seen & EOL_SEEN_CRLF) != 0
6532           && (eol_seen & EOL_SEEN_CR) != 0
6533           && (eol_seen & EOL_SEEN_LF) == 0)
6534         eol_seen = EOL_SEEN_CRLF;
6535       else if (eol_seen != EOL_SEEN_NONE
6536           && eol_seen != EOL_SEEN_LF
6537           && eol_seen != EOL_SEEN_CRLF
6538           && eol_seen != EOL_SEEN_CR)
6539         eol_seen = EOL_SEEN_LF;
6540       if (eol_seen != EOL_SEEN_NONE)
6541         eol_type = adjust_coding_eol_type (coding, eol_seen);
6542     }
6543
6544   if (EQ (eol_type, Qmac))
6545     {
6546       for (p = pbeg; p < pend; p++)
6547         if (*p == '\r')
6548           *p = '\n';
6549     }
6550   else if (EQ (eol_type, Qdos))
6551     {
6552       EMACS_INT n = 0;
6553
6554       if (NILP (coding->dst_object))
6555         {
6556           /* Start deleting '\r' from the tail to minimize the memory
6557              movement.  */
6558           for (p = pend - 2; p >= pbeg; p--)
6559             if (*p == '\r')
6560               {
6561                 memmove (p, p + 1, pend-- - p - 1);
6562                 n++;
6563               }
6564         }
6565       else
6566         {
6567           EMACS_INT pos_byte = coding->dst_pos_byte;
6568           EMACS_INT pos = coding->dst_pos;
6569           EMACS_INT pos_end = pos + coding->produced_char - 1;
6570
6571           while (pos < pos_end)
6572             {
6573               p = BYTE_POS_ADDR (pos_byte);
6574               if (*p == '\r' && p[1] == '\n')
6575                 {
6576                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6577                   n++;
6578                   pos_end--;
6579                 }
6580               pos++;
6581               if (coding->dst_multibyte)
6582                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6583               else
6584                 pos_byte++;
6585             }
6586         }
6587       coding->produced -= n;
6588       coding->produced_char -= n;
6589     }
6590 }
6591
6592
6593 /* Return a translation table (or list of them) from coding system
6594    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6595    decoding (ENCODEP is zero). */
6596
6597 static Lisp_Object
6598 get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
6599 {
6600   Lisp_Object standard, translation_table;
6601   Lisp_Object val;
6602
6603   if (NILP (Venable_character_translation))
6604     {
6605       if (max_lookup)
6606         *max_lookup = 0;
6607       return Qnil;
6608     }
6609   if (encodep)
6610     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6611       standard = Vstandard_translation_table_for_encode;
6612   else
6613     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6614       standard = Vstandard_translation_table_for_decode;
6615   if (NILP (translation_table))
6616     translation_table = standard;
6617   else
6618     {
6619       if (SYMBOLP (translation_table))
6620         translation_table = Fget (translation_table, Qtranslation_table);
6621       else if (CONSP (translation_table))
6622         {
6623           translation_table = Fcopy_sequence (translation_table);
6624           for (val = translation_table; CONSP (val); val = XCDR (val))
6625             if (SYMBOLP (XCAR (val)))
6626               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6627         }
6628       if (CHAR_TABLE_P (standard))
6629         {
6630           if (CONSP (translation_table))
6631             translation_table = nconc2 (translation_table,
6632                                         Fcons (standard, Qnil));
6633           else
6634             translation_table = Fcons (translation_table,
6635                                        Fcons (standard, Qnil));
6636         }
6637     }
6638
6639   if (max_lookup)
6640     {
6641       *max_lookup = 1;
6642       if (CHAR_TABLE_P (translation_table)
6643           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6644         {
6645           val = XCHAR_TABLE (translation_table)->extras[1];
6646           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6647             *max_lookup = XFASTINT (val);
6648         }
6649       else if (CONSP (translation_table))
6650         {
6651           Lisp_Object tail;
6652
6653           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6654             if (CHAR_TABLE_P (XCAR (tail))
6655                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6656               {
6657                 Lisp_Object tailval = XCHAR_TABLE (XCAR (tail))->extras[1];
6658                 if (NATNUMP (tailval) && *max_lookup < XFASTINT (tailval))
6659                   *max_lookup = XFASTINT (tailval);
6660               }
6661         }
6662     }
6663   return translation_table;
6664 }
6665
6666 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6667   do {                                                          \
6668     trans = Qnil;                                               \
6669     if (CHAR_TABLE_P (table))                                   \
6670       {                                                         \
6671         trans = CHAR_TABLE_REF (table, c);                      \
6672         if (CHARACTERP (trans))                                 \
6673           c = XFASTINT (trans), trans = Qnil;                   \
6674       }                                                         \
6675     else if (CONSP (table))                                     \
6676       {                                                         \
6677         Lisp_Object tail;                                       \
6678                                                                 \
6679         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6680           if (CHAR_TABLE_P (XCAR (tail)))                       \
6681             {                                                   \
6682               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6683               if (CHARACTERP (trans))                           \
6684                 c = XFASTINT (trans), trans = Qnil;             \
6685               else if (! NILP (trans))                          \
6686                 break;                                          \
6687             }                                                   \
6688       }                                                         \
6689   } while (0)
6690
6691
6692 /* Return a translation of character(s) at BUF according to TRANS.
6693    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6694    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6695    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6696    translation is found, and Qnil if not found..
6697    If BUF is too short to lookup characters in FROM, return Qt.  */
6698
6699 static Lisp_Object
6700 get_translation (Lisp_Object trans, int *buf, int *buf_end)
6701 {
6702
6703   if (INTEGERP (trans))
6704     return trans;
6705   for (; CONSP (trans); trans = XCDR (trans))
6706     {
6707       Lisp_Object val = XCAR (trans);
6708       Lisp_Object from = XCAR (val);
6709       int len = ASIZE (from);
6710       int i;
6711
6712       for (i = 0; i < len; i++)
6713         {
6714           if (buf + i == buf_end)
6715             return Qt;
6716           if (XINT (AREF (from, i)) != buf[i])
6717             break;
6718         }
6719       if (i == len)
6720         return val;
6721     }
6722   return Qnil;
6723 }
6724
6725
6726 static int
6727 produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6728                int last_block)
6729 {
6730   unsigned char *dst = coding->destination + coding->produced;
6731   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6732   EMACS_INT produced;
6733   EMACS_INT produced_chars = 0;
6734   int carryover = 0;
6735
6736   if (! coding->chars_at_source)
6737     {
6738       /* Source characters are in coding->charbuf.  */
6739       int *buf = coding->charbuf;
6740       int *buf_end = buf + coding->charbuf_used;
6741
6742       if (EQ (coding->src_object, coding->dst_object))
6743         {
6744           coding_set_source (coding);
6745           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6746         }
6747
6748       while (buf < buf_end)
6749         {
6750           int c = *buf, i;
6751
6752           if (c >= 0)
6753             {
6754               EMACS_INT from_nchars = 1, to_nchars = 1;
6755               Lisp_Object trans = Qnil;
6756
6757               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6758               if (! NILP (trans))
6759                 {
6760                   trans = get_translation (trans, buf, buf_end);
6761                   if (INTEGERP (trans))
6762                     c = XINT (trans);
6763                   else if (CONSP (trans))
6764                     {
6765                       from_nchars = ASIZE (XCAR (trans));
6766                       trans = XCDR (trans);
6767                       if (INTEGERP (trans))
6768                         c = XINT (trans);
6769                       else
6770                         {
6771                           to_nchars = ASIZE (trans);
6772                           c = XINT (AREF (trans, 0));
6773                         }
6774                     }
6775                   else if (EQ (trans, Qt) && ! last_block)
6776                     break;
6777                 }
6778
6779               if ((dst_end - dst) / MAX_MULTIBYTE_LENGTH < to_nchars)
6780                 {
6781                   if (((min (PTRDIFF_MAX, SIZE_MAX) - (buf_end - buf))
6782                        / MAX_MULTIBYTE_LENGTH)
6783                       < to_nchars)
6784                     memory_full (SIZE_MAX);
6785                   dst = alloc_destination (coding,
6786                                            buf_end - buf
6787                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6788                                            dst);
6789                   if (EQ (coding->src_object, coding->dst_object))
6790                     {
6791                       coding_set_source (coding);
6792                       dst_end = (((unsigned char *) coding->source)
6793                                  + coding->consumed);
6794                     }
6795                   else
6796                     dst_end = coding->destination + coding->dst_bytes;
6797                 }
6798
6799               for (i = 0; i < to_nchars; i++)
6800                 {
6801                   if (i > 0)
6802                     c = XINT (AREF (trans, i));
6803                   if (coding->dst_multibyte
6804                       || ! CHAR_BYTE8_P (c))
6805                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6806                   else
6807                     *dst++ = CHAR_TO_BYTE8 (c);
6808                 }
6809               produced_chars += to_nchars;
6810               buf += from_nchars;
6811             }
6812           else
6813             /* This is an annotation datum.  (-C) is the length.  */
6814             buf += -c;
6815         }
6816       carryover = buf_end - buf;
6817     }
6818   else
6819     {
6820       /* Source characters are at coding->source.  */
6821       const unsigned char *src = coding->source;
6822       const unsigned char *src_end = src + coding->consumed;
6823
6824       if (EQ (coding->dst_object, coding->src_object))
6825         dst_end = (unsigned char *) src;
6826       if (coding->src_multibyte != coding->dst_multibyte)
6827         {
6828           if (coding->src_multibyte)
6829             {
6830               int multibytep = 1;
6831               EMACS_INT consumed_chars = 0;
6832
6833               while (1)
6834                 {
6835                   const unsigned char *src_base = src;
6836                   int c;
6837
6838                   ONE_MORE_BYTE (c);
6839                   if (dst == dst_end)
6840                     {
6841                       if (EQ (coding->src_object, coding->dst_object))
6842                         dst_end = (unsigned char *) src;
6843                       if (dst == dst_end)
6844                         {
6845                           EMACS_INT offset = src - coding->source;
6846
6847                           dst = alloc_destination (coding, src_end - src + 1,
6848                                                    dst);
6849                           dst_end = coding->destination + coding->dst_bytes;
6850                           coding_set_source (coding);
6851                           src = coding->source + offset;
6852                           src_end = coding->source + coding->consumed;
6853                           if (EQ (coding->src_object, coding->dst_object))
6854                             dst_end = (unsigned char *) src;
6855                         }
6856                     }
6857                   *dst++ = c;
6858                   produced_chars++;
6859                 }
6860             no_more_source:
6861               ;
6862             }
6863           else
6864             while (src < src_end)
6865               {
6866                 int multibytep = 1;
6867                 int c = *src++;
6868
6869                 if (dst >= dst_end - 1)
6870                   {
6871                     if (EQ (coding->src_object, coding->dst_object))
6872                       dst_end = (unsigned char *) src;
6873                     if (dst >= dst_end - 1)
6874                       {
6875                         EMACS_INT offset = src - coding->source;
6876                         EMACS_INT more_bytes;
6877
6878                         if (EQ (coding->src_object, coding->dst_object))
6879                           more_bytes = ((src_end - src) / 2) + 2;
6880                         else
6881                           more_bytes = src_end - src + 2;
6882                         dst = alloc_destination (coding, more_bytes, dst);
6883                         dst_end = coding->destination + coding->dst_bytes;
6884                         coding_set_source (coding);
6885                         src = coding->source + offset;
6886                         src_end = coding->source + coding->consumed;
6887                         if (EQ (coding->src_object, coding->dst_object))
6888                           dst_end = (unsigned char *) src;
6889                       }
6890                   }
6891                 EMIT_ONE_BYTE (c);
6892               }
6893         }
6894       else
6895         {
6896           if (!EQ (coding->src_object, coding->dst_object))
6897             {
6898               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6899
6900               if (require > 0)
6901                 {
6902                   EMACS_INT offset = src - coding->source;
6903
6904                   dst = alloc_destination (coding, require, dst);
6905                   coding_set_source (coding);
6906                   src = coding->source + offset;
6907                   src_end = coding->source + coding->consumed;
6908                 }
6909             }
6910           produced_chars = coding->consumed_char;
6911           while (src < src_end)
6912             *dst++ = *src++;
6913         }
6914     }
6915
6916   produced = dst - (coding->destination + coding->produced);
6917   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6918     insert_from_gap (produced_chars, produced);
6919   coding->produced += produced;
6920   coding->produced_char += produced_chars;
6921   return carryover;
6922 }
6923
6924 /* Compose text in CODING->object according to the annotation data at
6925    CHARBUF.  CHARBUF is an array:
6926      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6927  */
6928
6929 static inline void
6930 produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6931 {
6932   int len;
6933   EMACS_INT to;
6934   enum composition_method method;
6935   Lisp_Object components;
6936
6937   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6938   to = pos + charbuf[2];
6939   method = (enum composition_method) (charbuf[4]);
6940
6941   if (method == COMPOSITION_RELATIVE)
6942     components = Qnil;
6943   else
6944     {
6945       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6946       int i, j;
6947
6948       if (method == COMPOSITION_WITH_RULE)
6949         len = charbuf[2] * 3 - 2;
6950       charbuf += MAX_ANNOTATION_LENGTH;
6951       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6952       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6953         {
6954           if (charbuf[i] >= 0)
6955             args[j] = make_number (charbuf[i]);
6956           else
6957             {
6958               i++;
6959               args[j] = make_number (charbuf[i] % 0x100);
6960             }
6961         }
6962       components = (i == j ? Fstring (j, args) : Fvector (j, args));
6963     }
6964   compose_text (pos, to, components, Qnil, coding->dst_object);
6965 }
6966
6967
6968 /* Put `charset' property on text in CODING->object according to
6969    the annotation data at CHARBUF.  CHARBUF is an array:
6970      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6971  */
6972
6973 static inline void
6974 produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6975 {
6976   EMACS_INT from = pos - charbuf[2];
6977   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6978
6979   Fput_text_property (make_number (from), make_number (pos),
6980                       Qcharset, CHARSET_NAME (charset),
6981                       coding->dst_object);
6982 }
6983
6984
6985 #define CHARBUF_SIZE 0x4000
6986
6987 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6988   do {                                                                  \
6989     int size = CHARBUF_SIZE;                                            \
6990                                                                         \
6991     coding->charbuf = NULL;                                             \
6992     while (size > 1024)                                                 \
6993       {                                                                 \
6994         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6995         if (coding->charbuf)                                            \
6996           break;                                                        \
6997         size >>= 1;                                                     \
6998       }                                                                 \
6999     if (! coding->charbuf)                                              \
7000       {                                                                 \
7001         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
7002         return coding->result;                                          \
7003       }                                                                 \
7004     coding->charbuf_size = size;                                        \
7005   } while (0)
7006
7007
7008 static void
7009 produce_annotation (struct coding_system *coding, EMACS_INT pos)
7010 {
7011   int *charbuf = coding->charbuf;
7012   int *charbuf_end = charbuf + coding->charbuf_used;
7013
7014   if (NILP (coding->dst_object))
7015     return;
7016
7017   while (charbuf < charbuf_end)
7018     {
7019       if (*charbuf >= 0)
7020         pos++, charbuf++;
7021       else
7022         {
7023           int len = -*charbuf;
7024
7025           if (len > 2)
7026             switch (charbuf[1])
7027               {
7028               case CODING_ANNOTATE_COMPOSITION_MASK:
7029                 produce_composition (coding, charbuf, pos);
7030                 break;
7031               case CODING_ANNOTATE_CHARSET_MASK:
7032                 produce_charset (coding, charbuf, pos);
7033                 break;
7034               }
7035           charbuf += len;
7036         }
7037     }
7038 }
7039
7040 /* Decode the data at CODING->src_object into CODING->dst_object.
7041    CODING->src_object is a buffer, a string, or nil.
7042    CODING->dst_object is a buffer.
7043
7044    If CODING->src_object is a buffer, it must be the current buffer.
7045    In this case, if CODING->src_pos is positive, it is a position of
7046    the source text in the buffer, otherwise, the source text is in the
7047    gap area of the buffer, and CODING->src_pos specifies the offset of
7048    the text from GPT (which must be the same as PT).  If this is the
7049    same buffer as CODING->dst_object, CODING->src_pos must be
7050    negative.
7051
7052    If CODING->src_object is a string, CODING->src_pos is an index to
7053    that string.
7054
7055    If CODING->src_object is nil, CODING->source must already point to
7056    the non-relocatable memory area.  In this case, CODING->src_pos is
7057    an offset from CODING->source.
7058
7059    The decoded data is inserted at the current point of the buffer
7060    CODING->dst_object.
7061 */
7062
7063 static int
7064 decode_coding (struct coding_system *coding)
7065 {
7066   Lisp_Object attrs;
7067   Lisp_Object undo_list;
7068   Lisp_Object translation_table;
7069   struct ccl_spec cclspec;
7070   int carryover;
7071   int i;
7072
7073   if (BUFFERP (coding->src_object)
7074       && coding->src_pos > 0
7075       && coding->src_pos < GPT
7076       && coding->src_pos + coding->src_chars > GPT)
7077     move_gap_both (coding->src_pos, coding->src_pos_byte);
7078
7079   undo_list = Qt;
7080   if (BUFFERP (coding->dst_object))
7081     {
7082       if (current_buffer != XBUFFER (coding->dst_object))
7083         set_buffer_internal (XBUFFER (coding->dst_object));
7084       if (GPT != PT)
7085         move_gap_both (PT, PT_BYTE);
7086       undo_list = BVAR (current_buffer, undo_list);
7087       BVAR (current_buffer, undo_list) = Qt;
7088     }
7089
7090   coding->consumed = coding->consumed_char = 0;
7091   coding->produced = coding->produced_char = 0;
7092   coding->chars_at_source = 0;
7093   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7094   coding->errors = 0;
7095
7096   ALLOC_CONVERSION_WORK_AREA (coding);
7097
7098   attrs = CODING_ID_ATTRS (coding->id);
7099   translation_table = get_translation_table (attrs, 0, NULL);
7100
7101   carryover = 0;
7102   if (coding->decoder == decode_coding_ccl)
7103     {
7104       coding->spec.ccl = &cclspec;
7105       setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7106     }
7107   do
7108     {
7109       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7110
7111       coding_set_source (coding);
7112       coding->annotated = 0;
7113       coding->charbuf_used = carryover;
7114       (*(coding->decoder)) (coding);
7115       coding_set_destination (coding);
7116       carryover = produce_chars (coding, translation_table, 0);
7117       if (coding->annotated)
7118         produce_annotation (coding, pos);
7119       for (i = 0; i < carryover; i++)
7120         coding->charbuf[i]
7121           = coding->charbuf[coding->charbuf_used - carryover + i];
7122     }
7123   while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7124          || (coding->consumed < coding->src_bytes
7125              && (coding->result == CODING_RESULT_SUCCESS
7126                  || coding->result == CODING_RESULT_INVALID_SRC)));
7127
7128   if (carryover > 0)
7129     {
7130       coding_set_destination (coding);
7131       coding->charbuf_used = carryover;
7132       produce_chars (coding, translation_table, 1);
7133     }
7134
7135   coding->carryover_bytes = 0;
7136   if (coding->consumed < coding->src_bytes)
7137     {
7138       int nbytes = coding->src_bytes - coding->consumed;
7139       const unsigned char *src;
7140
7141       coding_set_source (coding);
7142       coding_set_destination (coding);
7143       src = coding->source + coding->consumed;
7144
7145       if (coding->mode & CODING_MODE_LAST_BLOCK)
7146         {
7147           /* Flush out unprocessed data as binary chars.  We are sure
7148              that the number of data is less than the size of
7149              coding->charbuf.  */
7150           coding->charbuf_used = 0;
7151           coding->chars_at_source = 0;
7152
7153           while (nbytes-- > 0)
7154             {
7155               int c = *src++;
7156
7157               if (c & 0x80)
7158                 c = BYTE8_TO_CHAR (c);
7159               coding->charbuf[coding->charbuf_used++] = c;
7160             }
7161           produce_chars (coding, Qnil, 1);
7162         }
7163       else
7164         {
7165           /* Record unprocessed bytes in coding->carryover.  We are
7166              sure that the number of data is less than the size of
7167              coding->carryover.  */
7168           unsigned char *p = coding->carryover;
7169
7170           if (nbytes > sizeof coding->carryover)
7171             nbytes = sizeof coding->carryover;
7172           coding->carryover_bytes = nbytes;
7173           while (nbytes-- > 0)
7174             *p++ = *src++;
7175         }
7176       coding->consumed = coding->src_bytes;
7177     }
7178
7179   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7180       && !inhibit_eol_conversion)
7181     decode_eol (coding);
7182   if (BUFFERP (coding->dst_object))
7183     {
7184       BVAR (current_buffer, undo_list) = undo_list;
7185       record_insert (coding->dst_pos, coding->produced_char);
7186     }
7187   return coding->result;
7188 }
7189
7190
7191 /* Extract an annotation datum from a composition starting at POS and
7192    ending before LIMIT of CODING->src_object (buffer or string), store
7193    the data in BUF, set *STOP to a starting position of the next
7194    composition (if any) or to LIMIT, and return the address of the
7195    next element of BUF.
7196
7197    If such an annotation is not found, set *STOP to a starting
7198    position of a composition after POS (if any) or to LIMIT, and
7199    return BUF.  */
7200
7201 static inline int *
7202 handle_composition_annotation (EMACS_INT pos, EMACS_INT limit,
7203                                struct coding_system *coding, int *buf,
7204                                EMACS_INT *stop)
7205 {
7206   EMACS_INT start, end;
7207   Lisp_Object prop;
7208
7209   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7210       || end > limit)
7211     *stop = limit;
7212   else if (start > pos)
7213     *stop = start;
7214   else
7215     {
7216       if (start == pos)
7217         {
7218           /* We found a composition.  Store the corresponding
7219              annotation data in BUF.  */
7220           int *head = buf;
7221           enum composition_method method = COMPOSITION_METHOD (prop);
7222           int nchars = COMPOSITION_LENGTH (prop);
7223
7224           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7225           if (method != COMPOSITION_RELATIVE)
7226             {
7227               Lisp_Object components;
7228               int len, i, i_byte;
7229
7230               components = COMPOSITION_COMPONENTS (prop);
7231               if (VECTORP (components))
7232                 {
7233                   len = ASIZE (components);
7234                   for (i = 0; i < len; i++)
7235                     *buf++ = XINT (AREF (components, i));
7236                 }
7237               else if (STRINGP (components))
7238                 {
7239                   len = SCHARS (components);
7240                   i = i_byte = 0;
7241                   while (i < len)
7242                     {
7243                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7244                       buf++;
7245                     }
7246                 }
7247               else if (INTEGERP (components))
7248                 {
7249                   len = 1;
7250                   *buf++ = XINT (components);
7251                 }
7252               else if (CONSP (components))
7253                 {
7254                   for (len = 0; CONSP (components);
7255                        len++, components = XCDR (components))
7256                     *buf++ = XINT (XCAR (components));
7257                 }
7258               else
7259                 abort ();
7260               *head -= len;
7261             }
7262         }
7263
7264       if (find_composition (end, limit, &start, &end, &prop,
7265                             coding->src_object)
7266           && end <= limit)
7267         *stop = start;
7268       else
7269         *stop = limit;
7270     }
7271   return buf;
7272 }
7273
7274
7275 /* Extract an annotation datum from a text property `charset' at POS of
7276    CODING->src_object (buffer of string), store the data in BUF, set
7277    *STOP to the position where the value of `charset' property changes
7278    (limiting by LIMIT), and return the address of the next element of
7279    BUF.
7280
7281    If the property value is nil, set *STOP to the position where the
7282    property value is non-nil (limiting by LIMIT), and return BUF.  */
7283
7284 static inline int *
7285 handle_charset_annotation (EMACS_INT pos, EMACS_INT limit,
7286                            struct coding_system *coding, int *buf,
7287                            EMACS_INT *stop)
7288 {
7289   Lisp_Object val, next;
7290   int id;
7291
7292   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7293   if (! NILP (val) && CHARSETP (val))
7294     id = XINT (CHARSET_SYMBOL_ID (val));
7295   else
7296     id = -1;
7297   ADD_CHARSET_DATA (buf, 0, id);
7298   next = Fnext_single_property_change (make_number (pos), Qcharset,
7299                                        coding->src_object,
7300                                        make_number (limit));
7301   *stop = XINT (next);
7302   return buf;
7303 }
7304
7305
7306 static void
7307 consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7308                int max_lookup)
7309 {
7310   int *buf = coding->charbuf;
7311   int *buf_end = coding->charbuf + coding->charbuf_size;
7312   const unsigned char *src = coding->source + coding->consumed;
7313   const unsigned char *src_end = coding->source + coding->src_bytes;
7314   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7315   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7316   int multibytep = coding->src_multibyte;
7317   Lisp_Object eol_type;
7318   int c;
7319   EMACS_INT stop, stop_composition, stop_charset;
7320   int *lookup_buf = NULL;
7321
7322   if (! NILP (translation_table))
7323     lookup_buf = alloca (sizeof (int) * max_lookup);
7324
7325   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7326   if (VECTORP (eol_type))
7327     eol_type = Qunix;
7328
7329   /* Note: composition handling is not yet implemented.  */
7330   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7331
7332   if (NILP (coding->src_object))
7333     stop = stop_composition = stop_charset = end_pos;
7334   else
7335     {
7336       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7337         stop = stop_composition = pos;
7338       else
7339         stop = stop_composition = end_pos;
7340       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7341         stop = stop_charset = pos;
7342       else
7343         stop_charset = end_pos;
7344     }
7345
7346   /* Compensate for CRLF and conversion.  */
7347   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7348   while (buf < buf_end)
7349     {
7350       Lisp_Object trans;
7351
7352       if (pos == stop)
7353         {
7354           if (pos == end_pos)
7355             break;
7356           if (pos == stop_composition)
7357             buf = handle_composition_annotation (pos, end_pos, coding,
7358                                                  buf, &stop_composition);
7359           if (pos == stop_charset)
7360             buf = handle_charset_annotation (pos, end_pos, coding,
7361                                              buf, &stop_charset);
7362           stop = (stop_composition < stop_charset
7363                   ? stop_composition : stop_charset);
7364         }
7365
7366       if (! multibytep)
7367         {
7368           EMACS_INT bytes;
7369
7370           if (coding->encoder == encode_coding_raw_text
7371               || coding->encoder == encode_coding_ccl)
7372             c = *src++, pos++;
7373           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7374             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7375           else
7376             c = BYTE8_TO_CHAR (*src), src++, pos++;
7377         }
7378       else
7379         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7380       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7381         c = '\n';
7382       if (! EQ (eol_type, Qunix))
7383         {
7384           if (c == '\n')
7385             {
7386               if (EQ (eol_type, Qdos))
7387                 *buf++ = '\r';
7388               else
7389                 c = '\r';
7390             }
7391         }
7392
7393       trans = Qnil;
7394       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7395       if (NILP (trans))
7396         *buf++ = c;
7397       else
7398         {
7399           int from_nchars = 1, to_nchars = 1;
7400           int *lookup_buf_end;
7401           const unsigned char *p = src;
7402           int i;
7403
7404           lookup_buf[0] = c;
7405           for (i = 1; i < max_lookup && p < src_end; i++)
7406             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7407           lookup_buf_end = lookup_buf + i;
7408           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7409           if (INTEGERP (trans))
7410             c = XINT (trans);
7411           else if (CONSP (trans))
7412             {
7413               from_nchars = ASIZE (XCAR (trans));
7414               trans = XCDR (trans);
7415               if (INTEGERP (trans))
7416                 c = XINT (trans);
7417               else
7418                 {
7419                   to_nchars = ASIZE (trans);
7420                   if (buf + to_nchars > buf_end)
7421                     break;
7422                   c = XINT (AREF (trans, 0));
7423                 }
7424             }
7425           else
7426             break;
7427           *buf++ = c;
7428           for (i = 1; i < to_nchars; i++)
7429             *buf++ = XINT (AREF (trans, i));
7430           for (i = 1; i < from_nchars; i++, pos++)
7431             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7432         }
7433     }
7434
7435   coding->consumed = src - coding->source;
7436   coding->consumed_char = pos - coding->src_pos;
7437   coding->charbuf_used = buf - coding->charbuf;
7438   coding->chars_at_source = 0;
7439 }
7440
7441
7442 /* Encode the text at CODING->src_object into CODING->dst_object.
7443    CODING->src_object is a buffer or a string.
7444    CODING->dst_object is a buffer or nil.
7445
7446    If CODING->src_object is a buffer, it must be the current buffer.
7447    In this case, if CODING->src_pos is positive, it is a position of
7448    the source text in the buffer, otherwise. the source text is in the
7449    gap area of the buffer, and coding->src_pos specifies the offset of
7450    the text from GPT (which must be the same as PT).  If this is the
7451    same buffer as CODING->dst_object, CODING->src_pos must be
7452    negative and CODING should not have `pre-write-conversion'.
7453
7454    If CODING->src_object is a string, CODING should not have
7455    `pre-write-conversion'.
7456
7457    If CODING->dst_object is a buffer, the encoded data is inserted at
7458    the current point of that buffer.
7459
7460    If CODING->dst_object is nil, the encoded data is placed at the
7461    memory area specified by CODING->destination.  */
7462
7463 static int
7464 encode_coding (struct coding_system *coding)
7465 {
7466   Lisp_Object attrs;
7467   Lisp_Object translation_table;
7468   int max_lookup;
7469   struct ccl_spec cclspec;
7470
7471   attrs = CODING_ID_ATTRS (coding->id);
7472   if (coding->encoder == encode_coding_raw_text)
7473     translation_table = Qnil, max_lookup = 0;
7474   else
7475     translation_table = get_translation_table (attrs, 1, &max_lookup);
7476
7477   if (BUFFERP (coding->dst_object))
7478     {
7479       set_buffer_internal (XBUFFER (coding->dst_object));
7480       coding->dst_multibyte
7481         = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7482     }
7483
7484   coding->consumed = coding->consumed_char = 0;
7485   coding->produced = coding->produced_char = 0;
7486   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7487   coding->errors = 0;
7488
7489   ALLOC_CONVERSION_WORK_AREA (coding);
7490
7491   if (coding->encoder == encode_coding_ccl)
7492     {
7493       coding->spec.ccl = &cclspec;
7494       setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7495     }
7496   do {
7497     coding_set_source (coding);
7498     consume_chars (coding, translation_table, max_lookup);
7499     coding_set_destination (coding);
7500     (*(coding->encoder)) (coding);
7501   } while (coding->consumed_char < coding->src_chars);
7502
7503   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7504     insert_from_gap (coding->produced_char, coding->produced);
7505
7506   return (coding->result);
7507 }
7508
7509
7510 /* Name (or base name) of work buffer for code conversion.  */
7511 static Lisp_Object Vcode_conversion_workbuf_name;
7512
7513 /* A working buffer used by the top level conversion.  Once it is
7514    created, it is never destroyed.  It has the name
7515    Vcode_conversion_workbuf_name.  The other working buffers are
7516    destroyed after the use is finished, and their names are modified
7517    versions of Vcode_conversion_workbuf_name.  */
7518 static Lisp_Object Vcode_conversion_reused_workbuf;
7519
7520 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7521 static int reused_workbuf_in_use;
7522
7523
7524 /* Return a working buffer of code conversion.  MULTIBYTE specifies the
7525    multibyteness of returning buffer.  */
7526
7527 static Lisp_Object
7528 make_conversion_work_buffer (int multibyte)
7529 {
7530   Lisp_Object name, workbuf;
7531   struct buffer *current;
7532
7533   if (reused_workbuf_in_use++)
7534     {
7535       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7536       workbuf = Fget_buffer_create (name);
7537     }
7538   else
7539     {
7540       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7541         Vcode_conversion_reused_workbuf
7542           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7543       workbuf = Vcode_conversion_reused_workbuf;
7544     }
7545   current = current_buffer;
7546   set_buffer_internal (XBUFFER (workbuf));
7547   /* We can't allow modification hooks to run in the work buffer.  For
7548      instance, directory_files_internal assumes that file decoding
7549      doesn't compile new regexps.  */
7550   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7551   Ferase_buffer ();
7552   BVAR (current_buffer, undo_list) = Qt;
7553   BVAR (current_buffer, enable_multibyte_characters) = multibyte ? Qt : Qnil;
7554   set_buffer_internal (current);
7555   return workbuf;
7556 }
7557
7558
7559 static Lisp_Object
7560 code_conversion_restore (Lisp_Object arg)
7561 {
7562   Lisp_Object current, workbuf;
7563   struct gcpro gcpro1;
7564
7565   GCPRO1 (arg);
7566   current = XCAR (arg);
7567   workbuf = XCDR (arg);
7568   if (! NILP (workbuf))
7569     {
7570       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7571         reused_workbuf_in_use = 0;
7572       else if (! NILP (Fbuffer_live_p (workbuf)))
7573         Fkill_buffer (workbuf);
7574     }
7575   set_buffer_internal (XBUFFER (current));
7576   UNGCPRO;
7577   return Qnil;
7578 }
7579
7580 Lisp_Object
7581 code_conversion_save (int with_work_buf, int multibyte)
7582 {
7583   Lisp_Object workbuf = Qnil;
7584
7585   if (with_work_buf)
7586     workbuf = make_conversion_work_buffer (multibyte);
7587   record_unwind_protect (code_conversion_restore,
7588                          Fcons (Fcurrent_buffer (), workbuf));
7589   return workbuf;
7590 }
7591
7592 int
7593 decode_coding_gap (struct coding_system *coding,
7594                    EMACS_INT chars, EMACS_INT bytes)
7595 {
7596   int count = SPECPDL_INDEX ();
7597   Lisp_Object attrs;
7598
7599   code_conversion_save (0, 0);
7600
7601   coding->src_object = Fcurrent_buffer ();
7602   coding->src_chars = chars;
7603   coding->src_bytes = bytes;
7604   coding->src_pos = -chars;
7605   coding->src_pos_byte = -bytes;
7606   coding->src_multibyte = chars < bytes;
7607   coding->dst_object = coding->src_object;
7608   coding->dst_pos = PT;
7609   coding->dst_pos_byte = PT_BYTE;
7610   coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
7611
7612   if (CODING_REQUIRE_DETECTION (coding))
7613     detect_coding (coding);
7614
7615   coding->mode |= CODING_MODE_LAST_BLOCK;
7616   current_buffer->text->inhibit_shrinking = 1;
7617   decode_coding (coding);
7618   current_buffer->text->inhibit_shrinking = 0;
7619
7620   attrs = CODING_ID_ATTRS (coding->id);
7621   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7622     {
7623       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7624       Lisp_Object val;
7625
7626       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7627       val = call1 (CODING_ATTR_POST_READ (attrs),
7628                    make_number (coding->produced_char));
7629       CHECK_NATNUM (val);
7630       coding->produced_char += Z - prev_Z;
7631       coding->produced += Z_BYTE - prev_Z_BYTE;
7632     }
7633
7634   unbind_to (count, Qnil);
7635   return coding->result;
7636 }
7637
7638
7639 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7640    SRC_OBJECT into DST_OBJECT by coding context CODING.
7641
7642    SRC_OBJECT is a buffer, a string, or Qnil.
7643
7644    If it is a buffer, the text is at point of the buffer.  FROM and TO
7645    are positions in the buffer.
7646
7647    If it is a string, the text is at the beginning of the string.
7648    FROM and TO are indices to the string.
7649
7650    If it is nil, the text is at coding->source.  FROM and TO are
7651    indices to coding->source.
7652
7653    DST_OBJECT is a buffer, Qt, or Qnil.
7654
7655    If it is a buffer, the decoded text is inserted at point of the
7656    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7657    is deleted.
7658
7659    If it is Qt, a string is made from the decoded text, and
7660    set in CODING->dst_object.
7661
7662    If it is Qnil, the decoded text is stored at CODING->destination.
7663    The caller must allocate CODING->dst_bytes bytes at
7664    CODING->destination by xmalloc.  If the decoded text is longer than
7665    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7666  */
7667
7668 void
7669 decode_coding_object (struct coding_system *coding,
7670                       Lisp_Object src_object,
7671                       EMACS_INT from, EMACS_INT from_byte,
7672                       EMACS_INT to, EMACS_INT to_byte,
7673                       Lisp_Object dst_object)
7674 {
7675   int count = SPECPDL_INDEX ();
7676   unsigned char *destination IF_LINT (= NULL);
7677   EMACS_INT dst_bytes IF_LINT (= 0);
7678   EMACS_INT chars = to - from;
7679   EMACS_INT bytes = to_byte - from_byte;
7680   Lisp_Object attrs;
7681   int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7682   int need_marker_adjustment = 0;
7683   Lisp_Object old_deactivate_mark;
7684
7685   old_deactivate_mark = Vdeactivate_mark;
7686
7687   if (NILP (dst_object))
7688     {
7689       destination = coding->destination;
7690       dst_bytes = coding->dst_bytes;
7691     }
7692
7693   coding->src_object = src_object;
7694   coding->src_chars = chars;
7695   coding->src_bytes = bytes;
7696   coding->src_multibyte = chars < bytes;
7697
7698   if (STRINGP (src_object))
7699     {
7700       coding->src_pos = from;
7701       coding->src_pos_byte = from_byte;
7702     }
7703   else if (BUFFERP (src_object))
7704     {
7705       set_buffer_internal (XBUFFER (src_object));
7706       if (from != GPT)
7707         move_gap_both (from, from_byte);
7708       if (EQ (src_object, dst_object))
7709         {
7710           struct Lisp_Marker *tail;
7711
7712           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7713             {
7714               tail->need_adjustment
7715                 = tail->charpos == (tail->insertion_type ? from : to);
7716               need_marker_adjustment |= tail->need_adjustment;
7717             }
7718           saved_pt = PT, saved_pt_byte = PT_BYTE;
7719           TEMP_SET_PT_BOTH (from, from_byte);
7720           current_buffer->text->inhibit_shrinking = 1;
7721           del_range_both (from, from_byte, to, to_byte, 1);
7722           coding->src_pos = -chars;
7723           coding->src_pos_byte = -bytes;
7724         }
7725       else
7726         {
7727           coding->src_pos = from;
7728           coding->src_pos_byte = from_byte;
7729         }
7730     }
7731
7732   if (CODING_REQUIRE_DETECTION (coding))
7733     detect_coding (coding);
7734   attrs = CODING_ID_ATTRS (coding->id);
7735
7736   if (EQ (dst_object, Qt)
7737       || (! NILP (CODING_ATTR_POST_READ (attrs))
7738           && NILP (dst_object)))
7739     {
7740       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7741       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7742       coding->dst_pos = BEG;
7743       coding->dst_pos_byte = BEG_BYTE;
7744     }
7745   else if (BUFFERP (dst_object))
7746     {
7747       code_conversion_save (0, 0);
7748       coding->dst_object = dst_object;
7749       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7750       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7751       coding->dst_multibyte
7752         = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
7753     }
7754   else
7755     {
7756       code_conversion_save (0, 0);
7757       coding->dst_object = Qnil;
7758       /* Most callers presume this will return a multibyte result, and they
7759          won't use `binary' or `raw-text' anyway, so let's not worry about
7760          CODING_FOR_UNIBYTE.  */
7761       coding->dst_multibyte = 1;
7762     }
7763
7764   decode_coding (coding);
7765
7766   if (BUFFERP (coding->dst_object))
7767     set_buffer_internal (XBUFFER (coding->dst_object));
7768
7769   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7770     {
7771       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7772       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7773       Lisp_Object val;
7774
7775       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7776       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7777               old_deactivate_mark);
7778       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7779                         make_number (coding->produced_char));
7780       UNGCPRO;
7781       CHECK_NATNUM (val);
7782       coding->produced_char += Z - prev_Z;
7783       coding->produced += Z_BYTE - prev_Z_BYTE;
7784     }
7785
7786   if (EQ (dst_object, Qt))
7787     {
7788       coding->dst_object = Fbuffer_string ();
7789     }
7790   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7791     {
7792       set_buffer_internal (XBUFFER (coding->dst_object));
7793       if (dst_bytes < coding->produced)
7794         {
7795           destination = xrealloc (destination, coding->produced);
7796           if (! destination)
7797             {
7798               record_conversion_result (coding,
7799                                         CODING_RESULT_INSUFFICIENT_MEM);
7800               unbind_to (count, Qnil);
7801               return;
7802             }
7803           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7804             move_gap_both (BEGV, BEGV_BYTE);
7805           memcpy (destination, BEGV_ADDR, coding->produced);
7806           coding->destination = destination;
7807         }
7808     }
7809
7810   if (saved_pt >= 0)
7811     {
7812       /* This is the case of:
7813          (BUFFERP (src_object) && EQ (src_object, dst_object))
7814          As we have moved PT while replacing the original buffer
7815          contents, we must recover it now.  */
7816       set_buffer_internal (XBUFFER (src_object));
7817       current_buffer->text->inhibit_shrinking = 0;
7818       if (saved_pt < from)
7819         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7820       else if (saved_pt < from + chars)
7821         TEMP_SET_PT_BOTH (from, from_byte);
7822       else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
7823         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7824                           saved_pt_byte + (coding->produced - bytes));
7825       else
7826         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7827                           saved_pt_byte + (coding->produced - bytes));
7828
7829       if (need_marker_adjustment)
7830         {
7831           struct Lisp_Marker *tail;
7832
7833           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7834             if (tail->need_adjustment)
7835               {
7836                 tail->need_adjustment = 0;
7837                 if (tail->insertion_type)
7838                   {
7839                     tail->bytepos = from_byte;
7840                     tail->charpos = from;
7841                   }
7842                 else
7843                   {
7844                     tail->bytepos = from_byte + coding->produced;
7845                     tail->charpos
7846                       = (NILP (BVAR (current_buffer, enable_multibyte_characters))
7847                          ? tail->bytepos : from + coding->produced_char);
7848                   }
7849               }
7850         }
7851     }
7852
7853   Vdeactivate_mark = old_deactivate_mark;
7854   unbind_to (count, coding->dst_object);
7855 }
7856
7857
7858 void
7859 encode_coding_object (struct coding_system *coding,
7860                       Lisp_Object src_object,
7861                       EMACS_INT from, EMACS_INT from_byte,
7862                       EMACS_INT to, EMACS_INT to_byte,
7863                       Lisp_Object dst_object)
7864 {
7865   int count = SPECPDL_INDEX ();
7866   EMACS_INT chars = to - from;
7867   EMACS_INT bytes = to_byte - from_byte;
7868   Lisp_Object attrs;
7869   int saved_pt = -1, saved_pt_byte IF_LINT (= 0);
7870   int need_marker_adjustment = 0;
7871   int kill_src_buffer = 0;
7872   Lisp_Object old_deactivate_mark;
7873
7874   old_deactivate_mark = Vdeactivate_mark;
7875
7876   coding->src_object = src_object;
7877   coding->src_chars = chars;
7878   coding->src_bytes = bytes;
7879   coding->src_multibyte = chars < bytes;
7880
7881   attrs = CODING_ID_ATTRS (coding->id);
7882
7883   if (EQ (src_object, dst_object))
7884     {
7885       struct Lisp_Marker *tail;
7886
7887       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7888         {
7889           tail->need_adjustment
7890             = tail->charpos == (tail->insertion_type ? from : to);
7891           need_marker_adjustment |= tail->need_adjustment;
7892         }
7893     }
7894
7895   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7896     {
7897       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7898       set_buffer_internal (XBUFFER (coding->src_object));
7899       if (STRINGP (src_object))
7900         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7901       else if (BUFFERP (src_object))
7902         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7903       else
7904         insert_1_both ((char *) coding->source + from, chars, bytes, 0, 0, 0);
7905
7906       if (EQ (src_object, dst_object))
7907         {
7908           set_buffer_internal (XBUFFER (src_object));
7909           saved_pt = PT, saved_pt_byte = PT_BYTE;
7910           del_range_both (from, from_byte, to, to_byte, 1);
7911           set_buffer_internal (XBUFFER (coding->src_object));
7912         }
7913
7914       {
7915         Lisp_Object args[3];
7916         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7917
7918         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7919                 old_deactivate_mark);
7920         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7921         args[1] = make_number (BEG);
7922         args[2] = make_number (Z);
7923         safe_call (3, args);
7924         UNGCPRO;
7925       }
7926       if (XBUFFER (coding->src_object) != current_buffer)
7927         kill_src_buffer = 1;
7928       coding->src_object = Fcurrent_buffer ();
7929       if (BEG != GPT)
7930         move_gap_both (BEG, BEG_BYTE);
7931       coding->src_chars = Z - BEG;
7932       coding->src_bytes = Z_BYTE - BEG_BYTE;
7933       coding->src_pos = BEG;
7934       coding->src_pos_byte = BEG_BYTE;
7935       coding->src_multibyte = Z < Z_BYTE;
7936     }
7937   else if (STRINGP (src_object))
7938     {
7939       code_conversion_save (0, 0);
7940       coding->src_pos = from;
7941       coding->src_pos_byte = from_byte;
7942     }
7943   else if (BUFFERP (src_object))
7944     {
7945       code_conversion_save (0, 0);
7946       set_buffer_internal (XBUFFER (src_object));
7947       if (EQ (src_object, dst_object))
7948         {
7949           saved_pt = PT, saved_pt_byte = PT_BYTE;
7950           coding->src_object = del_range_1 (from, to, 1, 1);
7951           coding->src_pos = 0;
7952           coding->src_pos_byte = 0;
7953         }
7954       else
7955         {
7956           if (from < GPT && to >= GPT)
7957             move_gap_both (from, from_byte);
7958           coding->src_pos = from;
7959           coding->src_pos_byte = from_byte;
7960         }
7961     }
7962   else
7963     code_conversion_save (0, 0);
7964
7965   if (BUFFERP (dst_object))
7966     {
7967       coding->dst_object = dst_object;
7968       if (EQ (src_object, dst_object))
7969         {
7970           coding->dst_pos = from;
7971           coding->dst_pos_byte = from_byte;
7972         }
7973       else
7974         {
7975           struct buffer *current = current_buffer;
7976
7977           set_buffer_temp (XBUFFER (dst_object));
7978           coding->dst_pos = PT;
7979           coding->dst_pos_byte = PT_BYTE;
7980           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7981           set_buffer_temp (current);
7982         }
7983       coding->dst_multibyte
7984         = ! NILP (BVAR (XBUFFER (dst_object), enable_multibyte_characters));
7985     }
7986   else if (EQ (dst_object, Qt))
7987     {
7988       ptrdiff_t dst_bytes = max (1, coding->src_chars);
7989       coding->dst_object = Qnil;
7990       coding->destination = (unsigned char *) xmalloc (dst_bytes);
7991       coding->dst_bytes = dst_bytes;
7992       coding->dst_multibyte = 0;
7993     }
7994   else
7995     {
7996       coding->dst_object = Qnil;
7997       coding->dst_multibyte = 0;
7998     }
7999
8000   encode_coding (coding);
8001
8002   if (EQ (dst_object, Qt))
8003     {
8004       if (BUFFERP (coding->dst_object))
8005         coding->dst_object = Fbuffer_string ();
8006       else
8007         {
8008           coding->dst_object
8009             = make_unibyte_string ((char *) coding->destination,
8010                                    coding->produced);
8011           xfree (coding->destination);
8012         }
8013     }
8014
8015   if (saved_pt >= 0)
8016     {
8017       /* This is the case of:
8018          (BUFFERP (src_object) && EQ (src_object, dst_object))
8019          As we have moved PT while replacing the original buffer
8020          contents, we must recover it now.  */
8021       set_buffer_internal (XBUFFER (src_object));
8022       if (saved_pt < from)
8023         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8024       else if (saved_pt < from + chars)
8025         TEMP_SET_PT_BOTH (from, from_byte);
8026       else if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
8027         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8028                           saved_pt_byte + (coding->produced - bytes));
8029       else
8030         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8031                           saved_pt_byte + (coding->produced - bytes));
8032
8033       if (need_marker_adjustment)
8034         {
8035           struct Lisp_Marker *tail;
8036
8037           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8038             if (tail->need_adjustment)
8039               {
8040                 tail->need_adjustment = 0;
8041                 if (tail->insertion_type)
8042                   {
8043                     tail->bytepos = from_byte;
8044                     tail->charpos = from;
8045                   }
8046                 else
8047                   {
8048                     tail->bytepos = from_byte + coding->produced;
8049                     tail->charpos
8050                       = (NILP (BVAR (current_buffer, enable_multibyte_characters))
8051                          ? tail->bytepos : from + coding->produced_char);
8052                   }
8053               }
8054         }
8055     }
8056
8057   if (kill_src_buffer)
8058     Fkill_buffer (coding->src_object);
8059
8060   Vdeactivate_mark = old_deactivate_mark;
8061   unbind_to (count, Qnil);
8062 }
8063
8064
8065 Lisp_Object
8066 preferred_coding_system (void)
8067 {
8068   int id = coding_categories[coding_priorities[0]].id;
8069
8070   return CODING_ID_NAME (id);
8071 }
8072
8073 \f
8074 #ifdef emacs
8075 /*** 8. Emacs Lisp library functions ***/
8076
8077 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8078        doc: /* Return t if OBJECT is nil or a coding-system.
8079 See the documentation of `define-coding-system' for information
8080 about coding-system objects.  */)
8081   (Lisp_Object object)
8082 {
8083   if (NILP (object)
8084       || CODING_SYSTEM_ID (object) >= 0)
8085     return Qt;
8086   if (! SYMBOLP (object)
8087       || NILP (Fget (object, Qcoding_system_define_form)))
8088     return Qnil;
8089   return Qt;
8090 }
8091
8092 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8093        Sread_non_nil_coding_system, 1, 1, 0,
8094        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8095   (Lisp_Object prompt)
8096 {
8097   Lisp_Object val;
8098   do
8099     {
8100       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8101                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8102     }
8103   while (SCHARS (val) == 0);
8104   return (Fintern (val, Qnil));
8105 }
8106
8107 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8108        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8109 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8110 Ignores case when completing coding systems (all Emacs coding systems
8111 are lower-case).  */)
8112   (Lisp_Object prompt, Lisp_Object default_coding_system)
8113 {
8114   Lisp_Object val;
8115   int count = SPECPDL_INDEX ();
8116
8117   if (SYMBOLP (default_coding_system))
8118     default_coding_system = SYMBOL_NAME (default_coding_system);
8119   specbind (Qcompletion_ignore_case, Qt);
8120   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8121                           Qt, Qnil, Qcoding_system_history,
8122                           default_coding_system, Qnil);
8123   unbind_to (count, Qnil);
8124   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8125 }
8126
8127 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8128        1, 1, 0,
8129        doc: /* Check validity of CODING-SYSTEM.
8130 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8131 It is valid if it is nil or a symbol defined as a coding system by the
8132 function `define-coding-system'.  */)
8133   (Lisp_Object coding_system)
8134 {
8135   Lisp_Object define_form;
8136
8137   define_form = Fget (coding_system, Qcoding_system_define_form);
8138   if (! NILP (define_form))
8139     {
8140       Fput (coding_system, Qcoding_system_define_form, Qnil);
8141       safe_eval (define_form);
8142     }
8143   if (!NILP (Fcoding_system_p (coding_system)))
8144     return coding_system;
8145   xsignal1 (Qcoding_system_error, coding_system);
8146 }
8147
8148 \f
8149 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8150    HIGHEST is nonzero, return the coding system of the highest
8151    priority among the detected coding systems.  Otherwise return a
8152    list of detected coding systems sorted by their priorities.  If
8153    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8154    multibyte form but contains only ASCII and eight-bit chars.
8155    Otherwise, the bytes are raw bytes.
8156
8157    CODING-SYSTEM controls the detection as below:
8158
8159    If it is nil, detect both text-format and eol-format.  If the
8160    text-format part of CODING-SYSTEM is already specified
8161    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8162    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8163    detect only text-format.  */
8164
8165 Lisp_Object
8166 detect_coding_system (const unsigned char *src,
8167                       EMACS_INT src_chars, EMACS_INT src_bytes,
8168                       int highest, int multibytep,
8169                       Lisp_Object coding_system)
8170 {
8171   const unsigned char *src_end = src + src_bytes;
8172   Lisp_Object attrs, eol_type;
8173   Lisp_Object val = Qnil;
8174   struct coding_system coding;
8175   ptrdiff_t id;
8176   struct coding_detection_info detect_info;
8177   enum coding_category base_category;
8178   int null_byte_found = 0, eight_bit_found = 0;
8179
8180   if (NILP (coding_system))
8181     coding_system = Qundecided;
8182   setup_coding_system (coding_system, &coding);
8183   attrs = CODING_ID_ATTRS (coding.id);
8184   eol_type = CODING_ID_EOL_TYPE (coding.id);
8185   coding_system = CODING_ATTR_BASE_NAME (attrs);
8186
8187   coding.source = src;
8188   coding.src_chars = src_chars;
8189   coding.src_bytes = src_bytes;
8190   coding.src_multibyte = multibytep;
8191   coding.consumed = 0;
8192   coding.mode |= CODING_MODE_LAST_BLOCK;
8193   coding.head_ascii = 0;
8194
8195   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8196
8197   /* At first, detect text-format if necessary.  */
8198   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8199   if (base_category == coding_category_undecided)
8200     {
8201       enum coding_category category IF_LINT (= 0);
8202       struct coding_system *this IF_LINT (= NULL);
8203       int c, i;
8204
8205       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8206       for (; src < src_end; src++)
8207         {
8208           c = *src;
8209           if (c & 0x80)
8210             {
8211               eight_bit_found = 1;
8212               if (null_byte_found)
8213                 break;
8214             }
8215           else if (c < 0x20)
8216             {
8217               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8218                   && ! inhibit_iso_escape_detection
8219                   && ! detect_info.checked)
8220                 {
8221                   if (detect_coding_iso_2022 (&coding, &detect_info))
8222                     {
8223                       /* We have scanned the whole data.  */
8224                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8225                         {
8226                           /* We didn't find an 8-bit code.  We may
8227                              have found a null-byte, but it's very
8228                              rare that a binary file confirm to
8229                              ISO-2022.  */
8230                           src = src_end;
8231                           coding.head_ascii = src - coding.source;
8232                         }
8233                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8234                       break;
8235                     }
8236                 }
8237               else if (! c && !inhibit_null_byte_detection)
8238                 {
8239                   null_byte_found = 1;
8240                   if (eight_bit_found)
8241                     break;
8242                 }
8243               if (! eight_bit_found)
8244                 coding.head_ascii++;
8245             }
8246           else if (! eight_bit_found)
8247             coding.head_ascii++;
8248         }
8249
8250       if (null_byte_found || eight_bit_found
8251           || coding.head_ascii < coding.src_bytes
8252           || detect_info.found)
8253         {
8254           if (coding.head_ascii == coding.src_bytes)
8255             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8256             for (i = 0; i < coding_category_raw_text; i++)
8257               {
8258                 category = coding_priorities[i];
8259                 this = coding_categories + category;
8260                 if (detect_info.found & (1 << category))
8261                   break;
8262               }
8263           else
8264             {
8265               if (null_byte_found)
8266                 {
8267                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8268                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8269                 }
8270               for (i = 0; i < coding_category_raw_text; i++)
8271                 {
8272                   category = coding_priorities[i];
8273                   this = coding_categories + category;
8274
8275                   if (this->id < 0)
8276                     {
8277                       /* No coding system of this category is defined.  */
8278                       detect_info.rejected |= (1 << category);
8279                     }
8280                   else if (category >= coding_category_raw_text)
8281                     continue;
8282                   else if (detect_info.checked & (1 << category))
8283                     {
8284                       if (highest
8285                           && (detect_info.found & (1 << category)))
8286                         break;
8287                     }
8288                   else if ((*(this->detector)) (&coding, &detect_info)
8289                            && highest
8290                            && (detect_info.found & (1 << category)))
8291                     {
8292                       if (category == coding_category_utf_16_auto)
8293                         {
8294                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8295                             category = coding_category_utf_16_le;
8296                           else
8297                             category = coding_category_utf_16_be;
8298                         }
8299                       break;
8300                     }
8301                 }
8302             }
8303         }
8304
8305       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8306           || null_byte_found)
8307         {
8308           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8309           id = CODING_SYSTEM_ID (Qno_conversion);
8310           val = Fcons (make_number (id), Qnil);
8311         }
8312       else if (! detect_info.rejected && ! detect_info.found)
8313         {
8314           detect_info.found = CATEGORY_MASK_ANY;
8315           id = coding_categories[coding_category_undecided].id;
8316           val = Fcons (make_number (id), Qnil);
8317         }
8318       else if (highest)
8319         {
8320           if (detect_info.found)
8321             {
8322               detect_info.found = 1 << category;
8323               val = Fcons (make_number (this->id), Qnil);
8324             }
8325           else
8326             for (i = 0; i < coding_category_raw_text; i++)
8327               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8328                 {
8329                   detect_info.found = 1 << coding_priorities[i];
8330                   id = coding_categories[coding_priorities[i]].id;
8331                   val = Fcons (make_number (id), Qnil);
8332                   break;
8333                 }
8334         }
8335       else
8336         {
8337           int mask = detect_info.rejected | detect_info.found;
8338           int found = 0;
8339
8340           for (i = coding_category_raw_text - 1; i >= 0; i--)
8341             {
8342               category = coding_priorities[i];
8343               if (! (mask & (1 << category)))
8344                 {
8345                   found |= 1 << category;
8346                   id = coding_categories[category].id;
8347                   if (id >= 0)
8348                     val = Fcons (make_number (id), val);
8349                 }
8350             }
8351           for (i = coding_category_raw_text - 1; i >= 0; i--)
8352             {
8353               category = coding_priorities[i];
8354               if (detect_info.found & (1 << category))
8355                 {
8356                   id = coding_categories[category].id;
8357                   val = Fcons (make_number (id), val);
8358                 }
8359             }
8360           detect_info.found |= found;
8361         }
8362     }
8363   else if (base_category == coding_category_utf_8_auto)
8364     {
8365       if (detect_coding_utf_8 (&coding, &detect_info))
8366         {
8367           struct coding_system *this;
8368
8369           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8370             this = coding_categories + coding_category_utf_8_sig;
8371           else
8372             this = coding_categories + coding_category_utf_8_nosig;
8373           val = Fcons (make_number (this->id), Qnil);
8374         }
8375     }
8376   else if (base_category == coding_category_utf_16_auto)
8377     {
8378       if (detect_coding_utf_16 (&coding, &detect_info))
8379         {
8380           struct coding_system *this;
8381
8382           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8383             this = coding_categories + coding_category_utf_16_le;
8384           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8385             this = coding_categories + coding_category_utf_16_be;
8386           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8387             this = coding_categories + coding_category_utf_16_be_nosig;
8388           else
8389             this = coding_categories + coding_category_utf_16_le_nosig;
8390           val = Fcons (make_number (this->id), Qnil);
8391         }
8392     }
8393   else
8394     {
8395       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8396       val = Fcons (make_number (coding.id), Qnil);
8397     }
8398
8399   /* Then, detect eol-format if necessary.  */
8400   {
8401     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8402     Lisp_Object tail;
8403
8404     if (VECTORP (eol_type))
8405       {
8406         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8407           {
8408             if (null_byte_found)
8409               normal_eol = EOL_SEEN_LF;
8410             else
8411               normal_eol = detect_eol (coding.source, src_bytes,
8412                                        coding_category_raw_text);
8413           }
8414         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8415                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8416           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8417                                       coding_category_utf_16_be);
8418         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8419                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8420           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8421                                       coding_category_utf_16_le);
8422       }
8423     else
8424       {
8425         if (EQ (eol_type, Qunix))
8426           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8427         else if (EQ (eol_type, Qdos))
8428           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8429         else
8430           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8431       }
8432
8433     for (tail = val; CONSP (tail); tail = XCDR (tail))
8434       {
8435         enum coding_category category;
8436         int this_eol;
8437
8438         id = XINT (XCAR (tail));
8439         attrs = CODING_ID_ATTRS (id);
8440         category = XINT (CODING_ATTR_CATEGORY (attrs));
8441         eol_type = CODING_ID_EOL_TYPE (id);
8442         if (VECTORP (eol_type))
8443           {
8444             if (category == coding_category_utf_16_be
8445                 || category == coding_category_utf_16_be_nosig)
8446               this_eol = utf_16_be_eol;
8447             else if (category == coding_category_utf_16_le
8448                      || category == coding_category_utf_16_le_nosig)
8449               this_eol = utf_16_le_eol;
8450             else
8451               this_eol = normal_eol;
8452
8453             if (this_eol == EOL_SEEN_LF)
8454               XSETCAR (tail, AREF (eol_type, 0));
8455             else if (this_eol == EOL_SEEN_CRLF)
8456               XSETCAR (tail, AREF (eol_type, 1));
8457             else if (this_eol == EOL_SEEN_CR)
8458               XSETCAR (tail, AREF (eol_type, 2));
8459             else
8460               XSETCAR (tail, CODING_ID_NAME (id));
8461           }
8462         else
8463           XSETCAR (tail, CODING_ID_NAME (id));
8464       }
8465   }
8466
8467   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8468 }
8469
8470
8471 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8472        2, 3, 0,
8473        doc: /* Detect coding system of the text in the region between START and END.
8474 Return a list of possible coding systems ordered by priority.
8475 The coding systems to try and their priorities follows what
8476 the function `coding-system-priority-list' (which see) returns.
8477
8478 If only ASCII characters are found (except for such ISO-2022 control
8479 characters as ESC), it returns a list of single element `undecided'
8480 or its subsidiary coding system according to a detected end-of-line
8481 format.
8482
8483 If optional argument HIGHEST is non-nil, return the coding system of
8484 highest priority.  */)
8485   (Lisp_Object start, Lisp_Object end, Lisp_Object highest)
8486 {
8487   int from, to;
8488   int from_byte, to_byte;
8489
8490   CHECK_NUMBER_COERCE_MARKER (start);
8491   CHECK_NUMBER_COERCE_MARKER (end);
8492
8493   validate_region (&start, &end);
8494   from = XINT (start), to = XINT (end);
8495   from_byte = CHAR_TO_BYTE (from);
8496   to_byte = CHAR_TO_BYTE (to);
8497
8498   if (from < GPT && to >= GPT)
8499     move_gap_both (to, to_byte);
8500
8501   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8502                                to - from, to_byte - from_byte,
8503                                !NILP (highest),
8504                                !NILP (BVAR (current_buffer
8505                                       , enable_multibyte_characters)),
8506                                Qnil);
8507 }
8508
8509 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8510        1, 2, 0,
8511        doc: /* Detect coding system of the text in STRING.
8512 Return a list of possible coding systems ordered by priority.
8513 The coding systems to try and their priorities follows what
8514 the function `coding-system-priority-list' (which see) returns.
8515
8516 If only ASCII characters are found (except for such ISO-2022 control
8517 characters as ESC), it returns a list of single element `undecided'
8518 or its subsidiary coding system according to a detected end-of-line
8519 format.
8520
8521 If optional argument HIGHEST is non-nil, return the coding system of
8522 highest priority.  */)
8523   (Lisp_Object string, Lisp_Object highest)
8524 {
8525   CHECK_STRING (string);
8526
8527   return detect_coding_system (SDATA (string),
8528                                SCHARS (string), SBYTES (string),
8529                                !NILP (highest), STRING_MULTIBYTE (string),
8530                                Qnil);
8531 }
8532
8533
8534 static inline int
8535 char_encodable_p (int c, Lisp_Object attrs)
8536 {
8537   Lisp_Object tail;
8538   struct charset *charset;
8539   Lisp_Object translation_table;
8540
8541   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8542   if (! NILP (translation_table))
8543     c = translate_char (translation_table, c);
8544   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8545        CONSP (tail); tail = XCDR (tail))
8546     {
8547       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8548       if (CHAR_CHARSET_P (c, charset))
8549         break;
8550     }
8551   return (! NILP (tail));
8552 }
8553
8554
8555 /* Return a list of coding systems that safely encode the text between
8556    START and END.  If EXCLUDE is non-nil, it is a list of coding
8557    systems not to check.  The returned list doesn't contain any such
8558    coding systems.  In any case, if the text contains only ASCII or is
8559    unibyte, return t.  */
8560
8561 DEFUN ("find-coding-systems-region-internal",
8562        Ffind_coding_systems_region_internal,
8563        Sfind_coding_systems_region_internal, 2, 3, 0,
8564        doc: /* Internal use only.  */)
8565   (Lisp_Object start, Lisp_Object end, Lisp_Object exclude)
8566 {
8567   Lisp_Object coding_attrs_list, safe_codings;
8568   EMACS_INT start_byte, end_byte;
8569   const unsigned char *p, *pbeg, *pend;
8570   int c;
8571   Lisp_Object tail, elt, work_table;
8572
8573   if (STRINGP (start))
8574     {
8575       if (!STRING_MULTIBYTE (start)
8576           || SCHARS (start) == SBYTES (start))
8577         return Qt;
8578       start_byte = 0;
8579       end_byte = SBYTES (start);
8580     }
8581   else
8582     {
8583       CHECK_NUMBER_COERCE_MARKER (start);
8584       CHECK_NUMBER_COERCE_MARKER (end);
8585       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8586         args_out_of_range (start, end);
8587       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
8588         return Qt;
8589       start_byte = CHAR_TO_BYTE (XINT (start));
8590       end_byte = CHAR_TO_BYTE (XINT (end));
8591       if (XINT (end) - XINT (start) == end_byte - start_byte)
8592         return Qt;
8593
8594       if (XINT (start) < GPT && XINT (end) > GPT)
8595         {
8596           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8597             move_gap_both (XINT (start), start_byte);
8598           else
8599             move_gap_both (XINT (end), end_byte);
8600         }
8601     }
8602
8603   coding_attrs_list = Qnil;
8604   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8605     if (NILP (exclude)
8606         || NILP (Fmemq (XCAR (tail), exclude)))
8607       {
8608         Lisp_Object attrs;
8609
8610         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8611         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8612             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8613           {
8614             ASET (attrs, coding_attr_trans_tbl,
8615                   get_translation_table (attrs, 1, NULL));
8616             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8617           }
8618       }
8619
8620   if (STRINGP (start))
8621     p = pbeg = SDATA (start);
8622   else
8623     p = pbeg = BYTE_POS_ADDR (start_byte);
8624   pend = p + (end_byte - start_byte);
8625
8626   while (p < pend && ASCII_BYTE_P (*p)) p++;
8627   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8628
8629   work_table = Fmake_char_table (Qnil, Qnil);
8630   while (p < pend)
8631     {
8632       if (ASCII_BYTE_P (*p))
8633         p++;
8634       else
8635         {
8636           c = STRING_CHAR_ADVANCE (p);
8637           if (!NILP (char_table_ref (work_table, c)))
8638             /* This character was already checked.  Ignore it.  */
8639             continue;
8640
8641           charset_map_loaded = 0;
8642           for (tail = coding_attrs_list; CONSP (tail);)
8643             {
8644               elt = XCAR (tail);
8645               if (NILP (elt))
8646                 tail = XCDR (tail);
8647               else if (char_encodable_p (c, elt))
8648                 tail = XCDR (tail);
8649               else if (CONSP (XCDR (tail)))
8650                 {
8651                   XSETCAR (tail, XCAR (XCDR (tail)));
8652                   XSETCDR (tail, XCDR (XCDR (tail)));
8653                 }
8654               else
8655                 {
8656                   XSETCAR (tail, Qnil);
8657                   tail = XCDR (tail);
8658                 }
8659             }
8660           if (charset_map_loaded)
8661             {
8662               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8663
8664               if (STRINGP (start))
8665                 pbeg = SDATA (start);
8666               else
8667                 pbeg = BYTE_POS_ADDR (start_byte);
8668               p = pbeg + p_offset;
8669               pend = pbeg + pend_offset;
8670             }
8671           char_table_set (work_table, c, Qt);
8672         }
8673     }
8674
8675   safe_codings = list2 (Qraw_text, Qno_conversion);
8676   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8677     if (! NILP (XCAR (tail)))
8678       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8679
8680   return safe_codings;
8681 }
8682
8683
8684 DEFUN ("unencodable-char-position", Funencodable_char_position,
8685        Sunencodable_char_position, 3, 5, 0,
8686        doc: /*
8687 Return position of first un-encodable character in a region.
8688 START and END specify the region and CODING-SYSTEM specifies the
8689 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8690
8691 If optional 4th argument COUNT is non-nil, it specifies at most how
8692 many un-encodable characters to search.  In this case, the value is a
8693 list of positions.
8694
8695 If optional 5th argument STRING is non-nil, it is a string to search
8696 for un-encodable characters.  In that case, START and END are indexes
8697 to the string.  */)
8698   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string)
8699 {
8700   int n;
8701   struct coding_system coding;
8702   Lisp_Object attrs, charset_list, translation_table;
8703   Lisp_Object positions;
8704   int from, to;
8705   const unsigned char *p, *stop, *pend;
8706   int ascii_compatible;
8707
8708   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8709   attrs = CODING_ID_ATTRS (coding.id);
8710   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8711     return Qnil;
8712   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8713   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8714   translation_table = get_translation_table (attrs, 1, NULL);
8715
8716   if (NILP (string))
8717     {
8718       validate_region (&start, &end);
8719       from = XINT (start);
8720       to = XINT (end);
8721       if (NILP (BVAR (current_buffer, enable_multibyte_characters))
8722           || (ascii_compatible
8723               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8724         return Qnil;
8725       p = CHAR_POS_ADDR (from);
8726       pend = CHAR_POS_ADDR (to);
8727       if (from < GPT && to >= GPT)
8728         stop = GPT_ADDR;
8729       else
8730         stop = pend;
8731     }
8732   else
8733     {
8734       CHECK_STRING (string);
8735       CHECK_NATNUM (start);
8736       CHECK_NATNUM (end);
8737       from = XINT (start);
8738       to = XINT (end);
8739       if (from > to
8740           || to > SCHARS (string))
8741         args_out_of_range_3 (string, start, end);
8742       if (! STRING_MULTIBYTE (string))
8743         return Qnil;
8744       p = SDATA (string) + string_char_to_byte (string, from);
8745       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8746       if (ascii_compatible && (to - from) == (pend - p))
8747         return Qnil;
8748     }
8749
8750   if (NILP (count))
8751     n = 1;
8752   else
8753     {
8754       CHECK_NATNUM (count);
8755       n = XINT (count);
8756     }
8757
8758   positions = Qnil;
8759   charset_map_loaded = 0;
8760   while (1)
8761     {
8762       int c;
8763
8764       if (ascii_compatible)
8765         while (p < stop && ASCII_BYTE_P (*p))
8766           p++, from++;
8767       if (p >= stop)
8768         {
8769           if (p >= pend)
8770             break;
8771           stop = pend;
8772           p = GAP_END_ADDR;
8773         }
8774
8775       c = STRING_CHAR_ADVANCE (p);
8776       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8777           && ! char_charset (translate_char (translation_table, c),
8778                              charset_list, NULL))
8779         {
8780           positions = Fcons (make_number (from), positions);
8781           n--;
8782           if (n == 0)
8783             break;
8784         }
8785
8786       from++;
8787       if (charset_map_loaded && NILP (string))
8788         {
8789           p = CHAR_POS_ADDR (from);
8790           pend = CHAR_POS_ADDR (to);
8791           if (from < GPT && to >= GPT)
8792             stop = GPT_ADDR;
8793           else
8794             stop = pend;
8795           charset_map_loaded = 0;
8796         }
8797     }
8798
8799   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8800 }
8801
8802
8803 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8804        Scheck_coding_systems_region, 3, 3, 0,
8805        doc: /* Check if the region is encodable by coding systems.
8806
8807 START and END are buffer positions specifying the region.
8808 CODING-SYSTEM-LIST is a list of coding systems to check.
8809
8810 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8811 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8812 whole region, POS0, POS1, ... are buffer positions where non-encodable
8813 characters are found.
8814
8815 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8816 value is nil.
8817
8818 START may be a string.  In that case, check if the string is
8819 encodable, and the value contains indices to the string instead of
8820 buffer positions.  END is ignored.
8821
8822 If the current buffer (or START if it is a string) is unibyte, the value
8823 is nil.  */)
8824   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system_list)
8825 {
8826   Lisp_Object list;
8827   EMACS_INT start_byte, end_byte;
8828   int pos;
8829   const unsigned char *p, *pbeg, *pend;
8830   int c;
8831   Lisp_Object tail, elt, attrs;
8832
8833   if (STRINGP (start))
8834     {
8835       if (!STRING_MULTIBYTE (start)
8836           || SCHARS (start) == SBYTES (start))
8837         return Qnil;
8838       start_byte = 0;
8839       end_byte = SBYTES (start);
8840       pos = 0;
8841     }
8842   else
8843     {
8844       CHECK_NUMBER_COERCE_MARKER (start);
8845       CHECK_NUMBER_COERCE_MARKER (end);
8846       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8847         args_out_of_range (start, end);
8848       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
8849         return Qnil;
8850       start_byte = CHAR_TO_BYTE (XINT (start));
8851       end_byte = CHAR_TO_BYTE (XINT (end));
8852       if (XINT (end) - XINT (start) == end_byte - start_byte)
8853         return Qnil;
8854
8855       if (XINT (start) < GPT && XINT (end) > GPT)
8856         {
8857           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8858             move_gap_both (XINT (start), start_byte);
8859           else
8860             move_gap_both (XINT (end), end_byte);
8861         }
8862       pos = XINT (start);
8863     }
8864
8865   list = Qnil;
8866   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8867     {
8868       elt = XCAR (tail);
8869       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8870       ASET (attrs, coding_attr_trans_tbl,
8871             get_translation_table (attrs, 1, NULL));
8872       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8873     }
8874
8875   if (STRINGP (start))
8876     p = pbeg = SDATA (start);
8877   else
8878     p = pbeg = BYTE_POS_ADDR (start_byte);
8879   pend = p + (end_byte - start_byte);
8880
8881   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8882   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8883
8884   while (p < pend)
8885     {
8886       if (ASCII_BYTE_P (*p))
8887         p++;
8888       else
8889         {
8890           c = STRING_CHAR_ADVANCE (p);
8891
8892           charset_map_loaded = 0;
8893           for (tail = list; CONSP (tail); tail = XCDR (tail))
8894             {
8895               elt = XCDR (XCAR (tail));
8896               if (! char_encodable_p (c, XCAR (elt)))
8897                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8898             }
8899           if (charset_map_loaded)
8900             {
8901               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8902
8903               if (STRINGP (start))
8904                 pbeg = SDATA (start);
8905               else
8906                 pbeg = BYTE_POS_ADDR (start_byte);
8907               p = pbeg + p_offset;
8908               pend = pbeg + pend_offset;
8909             }
8910         }
8911       pos++;
8912     }
8913
8914   tail = list;
8915   list = Qnil;
8916   for (; CONSP (tail); tail = XCDR (tail))
8917     {
8918       elt = XCAR (tail);
8919       if (CONSP (XCDR (XCDR (elt))))
8920         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8921                       list);
8922     }
8923
8924   return list;
8925 }
8926
8927
8928 static Lisp_Object
8929 code_convert_region (Lisp_Object start, Lisp_Object end,
8930                      Lisp_Object coding_system, Lisp_Object dst_object,
8931                      int encodep, int norecord)
8932 {
8933   struct coding_system coding;
8934   EMACS_INT from, from_byte, to, to_byte;
8935   Lisp_Object src_object;
8936
8937   CHECK_NUMBER_COERCE_MARKER (start);
8938   CHECK_NUMBER_COERCE_MARKER (end);
8939   if (NILP (coding_system))
8940     coding_system = Qno_conversion;
8941   else
8942     CHECK_CODING_SYSTEM (coding_system);
8943   src_object = Fcurrent_buffer ();
8944   if (NILP (dst_object))
8945     dst_object = src_object;
8946   else if (! EQ (dst_object, Qt))
8947     CHECK_BUFFER (dst_object);
8948
8949   validate_region (&start, &end);
8950   from = XFASTINT (start);
8951   from_byte = CHAR_TO_BYTE (from);
8952   to = XFASTINT (end);
8953   to_byte = CHAR_TO_BYTE (to);
8954
8955   setup_coding_system (coding_system, &coding);
8956   coding.mode |= CODING_MODE_LAST_BLOCK;
8957
8958   if (encodep)
8959     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8960                           dst_object);
8961   else
8962     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8963                           dst_object);
8964   if (! norecord)
8965     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8966
8967   return (BUFFERP (dst_object)
8968           ? make_number (coding.produced_char)
8969           : coding.dst_object);
8970 }
8971
8972
8973 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8974        3, 4, "r\nzCoding system: ",
8975        doc: /* Decode the current region from the specified coding system.
8976 When called from a program, takes four arguments:
8977         START, END, CODING-SYSTEM, and DESTINATION.
8978 START and END are buffer positions.
8979
8980 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8981 If nil, the region between START and END is replaced by the decoded text.
8982 If buffer, the decoded text is inserted in that buffer after point (point
8983 does not move).
8984 In those cases, the length of the decoded text is returned.
8985 If DESTINATION is t, the decoded text is returned.
8986
8987 This function sets `last-coding-system-used' to the precise coding system
8988 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8989 not fully specified.)  */)
8990   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
8991 {
8992   return code_convert_region (start, end, coding_system, destination, 0, 0);
8993 }
8994
8995 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8996        3, 4, "r\nzCoding system: ",
8997        doc: /* Encode the current region by specified coding system.
8998 When called from a program, takes four arguments:
8999         START, END, CODING-SYSTEM and DESTINATION.
9000 START and END are buffer positions.
9001
9002 Optional 4th arguments DESTINATION specifies where the encoded text goes.
9003 If nil, the region between START and END is replace by the encoded text.
9004 If buffer, the encoded text is inserted in that buffer after point (point
9005 does not move).
9006 In those cases, the length of the encoded text is returned.
9007 If DESTINATION is t, the encoded text is returned.
9008
9009 This function sets `last-coding-system-used' to the precise coding system
9010 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9011 not fully specified.)  */)
9012   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
9013 {
9014   return code_convert_region (start, end, coding_system, destination, 1, 0);
9015 }
9016
9017 Lisp_Object
9018 code_convert_string (Lisp_Object string, Lisp_Object coding_system,
9019                      Lisp_Object dst_object, int encodep, int nocopy, int norecord)
9020 {
9021   struct coding_system coding;
9022   EMACS_INT chars, bytes;
9023
9024   CHECK_STRING (string);
9025   if (NILP (coding_system))
9026     {
9027       if (! norecord)
9028         Vlast_coding_system_used = Qno_conversion;
9029       if (NILP (dst_object))
9030         return (nocopy ? Fcopy_sequence (string) : string);
9031     }
9032
9033   if (NILP (coding_system))
9034     coding_system = Qno_conversion;
9035   else
9036     CHECK_CODING_SYSTEM (coding_system);
9037   if (NILP (dst_object))
9038     dst_object = Qt;
9039   else if (! EQ (dst_object, Qt))
9040     CHECK_BUFFER (dst_object);
9041
9042   setup_coding_system (coding_system, &coding);
9043   coding.mode |= CODING_MODE_LAST_BLOCK;
9044   chars = SCHARS (string);
9045   bytes = SBYTES (string);
9046   if (encodep)
9047     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9048   else
9049     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9050   if (! norecord)
9051     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9052
9053   return (BUFFERP (dst_object)
9054           ? make_number (coding.produced_char)
9055           : coding.dst_object);
9056 }
9057
9058
9059 /* Encode or decode STRING according to CODING_SYSTEM.
9060    Do not set Vlast_coding_system_used.
9061
9062    This function is called only from macros DECODE_FILE and
9063    ENCODE_FILE, thus we ignore character composition.  */
9064
9065 Lisp_Object
9066 code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9067                               int encodep)
9068 {
9069   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9070 }
9071
9072
9073 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9074        2, 4, 0,
9075        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9076
9077 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9078 if the decoding operation is trivial.
9079
9080 Optional fourth arg BUFFER non-nil means that the decoded text is
9081 inserted in that buffer after point (point does not move).  In this
9082 case, the return value is the length of the decoded text.
9083
9084 This function sets `last-coding-system-used' to the precise coding system
9085 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9086 not fully specified.)  */)
9087   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9088 {
9089   return code_convert_string (string, coding_system, buffer,
9090                               0, ! NILP (nocopy), 0);
9091 }
9092
9093 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9094        2, 4, 0,
9095        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9096
9097 Optional third arg NOCOPY non-nil means it is OK to return STRING
9098 itself if the encoding operation is trivial.
9099
9100 Optional fourth arg BUFFER non-nil means that the encoded text is
9101 inserted in that buffer after point (point does not move).  In this
9102 case, the return value is the length of the encoded text.
9103
9104 This function sets `last-coding-system-used' to the precise coding system
9105 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9106 not fully specified.)  */)
9107   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9108 {
9109   return code_convert_string (string, coding_system, buffer,
9110                               1, ! NILP (nocopy), 0);
9111 }
9112
9113 \f
9114 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9115        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9116 Return the corresponding character.  */)
9117   (Lisp_Object code)
9118 {
9119   Lisp_Object spec, attrs, val;
9120   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9121   EMACS_INT ch;
9122   int c;
9123
9124   CHECK_NATNUM (code);
9125   ch = XFASTINT (code);
9126   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9127   attrs = AREF (spec, 0);
9128
9129   if (ASCII_BYTE_P (ch)
9130       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9131     return code;
9132
9133   val = CODING_ATTR_CHARSET_LIST (attrs);
9134   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9135   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9136   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9137
9138   if (ch <= 0x7F)
9139     {
9140       c = ch;
9141       charset = charset_roman;
9142     }
9143   else if (ch >= 0xA0 && ch < 0xDF)
9144     {
9145       c = ch - 0x80;
9146       charset = charset_kana;
9147     }
9148   else
9149     {
9150       EMACS_INT c1 = ch >> 8;
9151       int c2 = ch & 0xFF;
9152
9153       if (c1 < 0x81 || (c1 > 0x9F && c1 < 0xE0) || c1 > 0xEF
9154           || c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
9155         error ("Invalid code: %"pI"d", ch);
9156       c = ch;
9157       SJIS_TO_JIS (c);
9158       charset = charset_kanji;
9159     }
9160   c = DECODE_CHAR (charset, c);
9161   if (c < 0)
9162     error ("Invalid code: %"pI"d", ch);
9163   return make_number (c);
9164 }
9165
9166
9167 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9168        doc: /* Encode a Japanese character CH to shift_jis encoding.
9169 Return the corresponding code in SJIS.  */)
9170   (Lisp_Object ch)
9171 {
9172   Lisp_Object spec, attrs, charset_list;
9173   int c;
9174   struct charset *charset;
9175   unsigned code;
9176
9177   CHECK_CHARACTER (ch);
9178   c = XFASTINT (ch);
9179   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9180   attrs = AREF (spec, 0);
9181
9182   if (ASCII_CHAR_P (c)
9183       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9184     return ch;
9185
9186   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9187   charset = char_charset (c, charset_list, &code);
9188   if (code == CHARSET_INVALID_CODE (charset))
9189     error ("Can't encode by shift_jis encoding: %c", c);
9190   JIS_TO_SJIS (code);
9191
9192   return make_number (code);
9193 }
9194
9195 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9196        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9197 Return the corresponding character.  */)
9198   (Lisp_Object code)
9199 {
9200   Lisp_Object spec, attrs, val;
9201   struct charset *charset_roman, *charset_big5, *charset;
9202   EMACS_INT ch;
9203   int c;
9204
9205   CHECK_NATNUM (code);
9206   ch = XFASTINT (code);
9207   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9208   attrs = AREF (spec, 0);
9209
9210   if (ASCII_BYTE_P (ch)
9211       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9212     return code;
9213
9214   val = CODING_ATTR_CHARSET_LIST (attrs);
9215   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9216   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9217
9218   if (ch <= 0x7F)
9219     {
9220       c = ch;
9221       charset = charset_roman;
9222     }
9223   else
9224     {
9225       EMACS_INT b1 = ch >> 8;
9226       int b2 = ch & 0x7F;
9227       if (b1 < 0xA1 || b1 > 0xFE
9228           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9229         error ("Invalid code: %"pI"d", ch);
9230       c = ch;
9231       charset = charset_big5;
9232     }
9233   c = DECODE_CHAR (charset, c);
9234   if (c < 0)
9235     error ("Invalid code: %"pI"d", ch);
9236   return make_number (c);
9237 }
9238
9239 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9240        doc: /* Encode the Big5 character CH to BIG5 coding system.
9241 Return the corresponding character code in Big5.  */)
9242   (Lisp_Object ch)
9243 {
9244   Lisp_Object spec, attrs, charset_list;
9245   struct charset *charset;
9246   int c;
9247   unsigned code;
9248
9249   CHECK_CHARACTER (ch);
9250   c = XFASTINT (ch);
9251   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9252   attrs = AREF (spec, 0);
9253   if (ASCII_CHAR_P (c)
9254       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9255     return ch;
9256
9257   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9258   charset = char_charset (c, charset_list, &code);
9259   if (code == CHARSET_INVALID_CODE (charset))
9260     error ("Can't encode by Big5 encoding: %c", c);
9261
9262   return make_number (code);
9263 }
9264
9265 \f
9266 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9267        Sset_terminal_coding_system_internal, 1, 2, 0,
9268        doc: /* Internal use only.  */)
9269   (Lisp_Object coding_system, Lisp_Object terminal)
9270 {
9271   struct terminal *term = get_terminal (terminal, 1);
9272   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (term);
9273   CHECK_SYMBOL (coding_system);
9274   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9275   /* We had better not send unsafe characters to terminal.  */
9276   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9277   /* Character composition should be disabled.  */
9278   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9279   terminal_coding->src_multibyte = 1;
9280   terminal_coding->dst_multibyte = 0;
9281   if (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK)
9282     term->charset_list = coding_charset_list (terminal_coding);
9283   else
9284     term->charset_list = Fcons (make_number (charset_ascii), Qnil);
9285   return Qnil;
9286 }
9287
9288 DEFUN ("set-safe-terminal-coding-system-internal",
9289        Fset_safe_terminal_coding_system_internal,
9290        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9291        doc: /* Internal use only.  */)
9292   (Lisp_Object coding_system)
9293 {
9294   CHECK_SYMBOL (coding_system);
9295   setup_coding_system (Fcheck_coding_system (coding_system),
9296                        &safe_terminal_coding);
9297   /* Character composition should be disabled.  */
9298   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9299   safe_terminal_coding.src_multibyte = 1;
9300   safe_terminal_coding.dst_multibyte = 0;
9301   return Qnil;
9302 }
9303
9304 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9305        Sterminal_coding_system, 0, 1, 0,
9306        doc: /* Return coding system specified for terminal output on the given terminal.
9307 TERMINAL may be a terminal object, a frame, or nil for the selected
9308 frame's terminal device.  */)
9309   (Lisp_Object terminal)
9310 {
9311   struct coding_system *terminal_coding
9312     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9313   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9314
9315   /* For backward compatibility, return nil if it is `undecided'.  */
9316   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9317 }
9318
9319 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9320        Sset_keyboard_coding_system_internal, 1, 2, 0,
9321        doc: /* Internal use only.  */)
9322   (Lisp_Object coding_system, Lisp_Object terminal)
9323 {
9324   struct terminal *t = get_terminal (terminal, 1);
9325   CHECK_SYMBOL (coding_system);
9326   if (NILP (coding_system))
9327     coding_system = Qno_conversion;
9328   else
9329     Fcheck_coding_system (coding_system);
9330   setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9331   /* Character composition should be disabled.  */
9332   TERMINAL_KEYBOARD_CODING (t)->common_flags
9333     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9334   return Qnil;
9335 }
9336
9337 DEFUN ("keyboard-coding-system",
9338        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9339        doc: /* Return coding system specified for decoding keyboard input.  */)
9340   (Lisp_Object terminal)
9341 {
9342   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9343                          (get_terminal (terminal, 1))->id);
9344 }
9345
9346 \f
9347 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9348        Sfind_operation_coding_system,  1, MANY, 0,
9349        doc: /* Choose a coding system for an operation based on the target name.
9350 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9351 DECODING-SYSTEM is the coding system to use for decoding
9352 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9353 for encoding (in case OPERATION does encoding).
9354
9355 The first argument OPERATION specifies an I/O primitive:
9356   For file I/O, `insert-file-contents' or `write-region'.
9357   For process I/O, `call-process', `call-process-region', or `start-process'.
9358   For network I/O, `open-network-stream'.
9359
9360 The remaining arguments should be the same arguments that were passed
9361 to the primitive.  Depending on which primitive, one of those arguments
9362 is selected as the TARGET.  For example, if OPERATION does file I/O,
9363 whichever argument specifies the file name is TARGET.
9364
9365 TARGET has a meaning which depends on OPERATION:
9366   For file I/O, TARGET is a file name (except for the special case below).
9367   For process I/O, TARGET is a process name.
9368   For network I/O, TARGET is a service name or a port number.
9369
9370 This function looks up what is specified for TARGET in
9371 `file-coding-system-alist', `process-coding-system-alist',
9372 or `network-coding-system-alist' depending on OPERATION.
9373 They may specify a coding system, a cons of coding systems,
9374 or a function symbol to call.
9375 In the last case, we call the function with one argument,
9376 which is a list of all the arguments given to this function.
9377 If the function can't decide a coding system, it can return
9378 `undecided' so that the normal code-detection is performed.
9379
9380 If OPERATION is `insert-file-contents', the argument corresponding to
9381 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9382 file name to look up, and BUFFER is a buffer that contains the file's
9383 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9384 function to call for FILENAME, that function should examine the
9385 contents of BUFFER instead of reading the file.
9386
9387 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9388   (ptrdiff_t nargs, Lisp_Object *args)
9389 {
9390   Lisp_Object operation, target_idx, target, val;
9391   register Lisp_Object chain;
9392
9393   if (nargs < 2)
9394     error ("Too few arguments");
9395   operation = args[0];
9396   if (!SYMBOLP (operation)
9397       || !NATNUMP (target_idx = Fget (operation, Qtarget_idx)))
9398     error ("Invalid first argument");
9399   if (nargs < 1 + XFASTINT (target_idx))
9400     error ("Too few arguments for operation `%s'",
9401            SDATA (SYMBOL_NAME (operation)));
9402   target = args[XFASTINT (target_idx) + 1];
9403   if (!(STRINGP (target)
9404         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9405             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9406         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9407     error ("Invalid argument %"pI"d of operation `%s'",
9408            XFASTINT (target_idx) + 1, SDATA (SYMBOL_NAME (operation)));
9409   if (CONSP (target))
9410     target = XCAR (target);
9411
9412   chain = ((EQ (operation, Qinsert_file_contents)
9413             || EQ (operation, Qwrite_region))
9414            ? Vfile_coding_system_alist
9415            : (EQ (operation, Qopen_network_stream)
9416               ? Vnetwork_coding_system_alist
9417               : Vprocess_coding_system_alist));
9418   if (NILP (chain))
9419     return Qnil;
9420
9421   for (; CONSP (chain); chain = XCDR (chain))
9422     {
9423       Lisp_Object elt;
9424
9425       elt = XCAR (chain);
9426       if (CONSP (elt)
9427           && ((STRINGP (target)
9428                && STRINGP (XCAR (elt))
9429                && fast_string_match (XCAR (elt), target) >= 0)
9430               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9431         {
9432           val = XCDR (elt);
9433           /* Here, if VAL is both a valid coding system and a valid
9434              function symbol, we return VAL as a coding system.  */
9435           if (CONSP (val))
9436             return val;
9437           if (! SYMBOLP (val))
9438             return Qnil;
9439           if (! NILP (Fcoding_system_p (val)))
9440             return Fcons (val, val);
9441           if (! NILP (Ffboundp (val)))
9442             {
9443               /* We use call1 rather than safe_call1
9444                  so as to get bug reports about functions called here
9445                  which don't handle the current interface.  */
9446               val = call1 (val, Flist (nargs, args));
9447               if (CONSP (val))
9448                 return val;
9449               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9450                 return Fcons (val, val);
9451             }
9452           return Qnil;
9453         }
9454     }
9455   return Qnil;
9456 }
9457
9458 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9459        Sset_coding_system_priority, 0, MANY, 0,
9460        doc: /* Assign higher priority to the coding systems given as arguments.
9461 If multiple coding systems belong to the same category,
9462 all but the first one are ignored.
9463
9464 usage: (set-coding-system-priority &rest coding-systems)  */)
9465   (ptrdiff_t nargs, Lisp_Object *args)
9466 {
9467   ptrdiff_t i, j;
9468   int changed[coding_category_max];
9469   enum coding_category priorities[coding_category_max];
9470
9471   memset (changed, 0, sizeof changed);
9472
9473   for (i = j = 0; i < nargs; i++)
9474     {
9475       enum coding_category category;
9476       Lisp_Object spec, attrs;
9477
9478       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9479       attrs = AREF (spec, 0);
9480       category = XINT (CODING_ATTR_CATEGORY (attrs));
9481       if (changed[category])
9482         /* Ignore this coding system because a coding system of the
9483            same category already had a higher priority.  */
9484         continue;
9485       changed[category] = 1;
9486       priorities[j++] = category;
9487       if (coding_categories[category].id >= 0
9488           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9489         setup_coding_system (args[i], &coding_categories[category]);
9490       Fset (AREF (Vcoding_category_table, category), args[i]);
9491     }
9492
9493   /* Now we have decided top J priorities.  Reflect the order of the
9494      original priorities to the remaining priorities.  */
9495
9496   for (i = j, j = 0; i < coding_category_max; i++, j++)
9497     {
9498       while (j < coding_category_max
9499              && changed[coding_priorities[j]])
9500         j++;
9501       if (j == coding_category_max)
9502         abort ();
9503       priorities[i] = coding_priorities[j];
9504     }
9505
9506   memcpy (coding_priorities, priorities, sizeof priorities);
9507
9508   /* Update `coding-category-list'.  */
9509   Vcoding_category_list = Qnil;
9510   for (i = coding_category_max; i-- > 0; )
9511     Vcoding_category_list
9512       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9513                Vcoding_category_list);
9514
9515   return Qnil;
9516 }
9517
9518 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9519        Scoding_system_priority_list, 0, 1, 0,
9520        doc: /* Return a list of coding systems ordered by their priorities.
9521 The list contains a subset of coding systems; i.e. coding systems
9522 assigned to each coding category (see `coding-category-list').
9523
9524 HIGHESTP non-nil means just return the highest priority one.  */)
9525   (Lisp_Object highestp)
9526 {
9527   int i;
9528   Lisp_Object val;
9529
9530   for (i = 0, val = Qnil; i < coding_category_max; i++)
9531     {
9532       enum coding_category category = coding_priorities[i];
9533       int id = coding_categories[category].id;
9534       Lisp_Object attrs;
9535
9536       if (id < 0)
9537         continue;
9538       attrs = CODING_ID_ATTRS (id);
9539       if (! NILP (highestp))
9540         return CODING_ATTR_BASE_NAME (attrs);
9541       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9542     }
9543   return Fnreverse (val);
9544 }
9545
9546 static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
9547
9548 static Lisp_Object
9549 make_subsidiaries (Lisp_Object base)
9550 {
9551   Lisp_Object subsidiaries;
9552   ptrdiff_t base_name_len = SBYTES (SYMBOL_NAME (base));
9553   char *buf = (char *) alloca (base_name_len + 6);
9554   int i;
9555
9556   memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
9557   subsidiaries = Fmake_vector (make_number (3), Qnil);
9558   for (i = 0; i < 3; i++)
9559     {
9560       strcpy (buf + base_name_len, suffixes[i]);
9561       ASET (subsidiaries, i, intern (buf));
9562     }
9563   return subsidiaries;
9564 }
9565
9566
9567 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9568        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9569        doc: /* For internal use only.
9570 usage: (define-coding-system-internal ...)  */)
9571   (ptrdiff_t nargs, Lisp_Object *args)
9572 {
9573   Lisp_Object name;
9574   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9575   Lisp_Object attrs;            /* Vector of attributes.  */
9576   Lisp_Object eol_type;
9577   Lisp_Object aliases;
9578   Lisp_Object coding_type, charset_list, safe_charsets;
9579   enum coding_category category;
9580   Lisp_Object tail, val;
9581   int max_charset_id = 0;
9582   int i;
9583
9584   if (nargs < coding_arg_max)
9585     goto short_args;
9586
9587   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9588
9589   name = args[coding_arg_name];
9590   CHECK_SYMBOL (name);
9591   CODING_ATTR_BASE_NAME (attrs) = name;
9592
9593   val = args[coding_arg_mnemonic];
9594   if (! STRINGP (val))
9595     CHECK_CHARACTER (val);
9596   CODING_ATTR_MNEMONIC (attrs) = val;
9597
9598   coding_type = args[coding_arg_coding_type];
9599   CHECK_SYMBOL (coding_type);
9600   CODING_ATTR_TYPE (attrs) = coding_type;
9601
9602   charset_list = args[coding_arg_charset_list];
9603   if (SYMBOLP (charset_list))
9604     {
9605       if (EQ (charset_list, Qiso_2022))
9606         {
9607           if (! EQ (coding_type, Qiso_2022))
9608             error ("Invalid charset-list");
9609           charset_list = Viso_2022_charset_list;
9610         }
9611       else if (EQ (charset_list, Qemacs_mule))
9612         {
9613           if (! EQ (coding_type, Qemacs_mule))
9614             error ("Invalid charset-list");
9615           charset_list = Vemacs_mule_charset_list;
9616         }
9617       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9618         if (max_charset_id < XFASTINT (XCAR (tail)))
9619           max_charset_id = XFASTINT (XCAR (tail));
9620     }
9621   else
9622     {
9623       charset_list = Fcopy_sequence (charset_list);
9624       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9625         {
9626           struct charset *charset;
9627
9628           val = XCAR (tail);
9629           CHECK_CHARSET_GET_CHARSET (val, charset);
9630           if (EQ (coding_type, Qiso_2022)
9631               ? CHARSET_ISO_FINAL (charset) < 0
9632               : EQ (coding_type, Qemacs_mule)
9633               ? CHARSET_EMACS_MULE_ID (charset) < 0
9634               : 0)
9635             error ("Can't handle charset `%s'",
9636                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9637
9638           XSETCAR (tail, make_number (charset->id));
9639           if (max_charset_id < charset->id)
9640             max_charset_id = charset->id;
9641         }
9642     }
9643   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9644
9645   safe_charsets = make_uninit_string (max_charset_id + 1);
9646   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9647   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9648     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9649   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9650
9651   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9652
9653   val = args[coding_arg_decode_translation_table];
9654   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9655     CHECK_SYMBOL (val);
9656   CODING_ATTR_DECODE_TBL (attrs) = val;
9657
9658   val = args[coding_arg_encode_translation_table];
9659   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9660     CHECK_SYMBOL (val);
9661   CODING_ATTR_ENCODE_TBL (attrs) = val;
9662
9663   val = args[coding_arg_post_read_conversion];
9664   CHECK_SYMBOL (val);
9665   CODING_ATTR_POST_READ (attrs) = val;
9666
9667   val = args[coding_arg_pre_write_conversion];
9668   CHECK_SYMBOL (val);
9669   CODING_ATTR_PRE_WRITE (attrs) = val;
9670
9671   val = args[coding_arg_default_char];
9672   if (NILP (val))
9673     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9674   else
9675     {
9676       CHECK_CHARACTER (val);
9677       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9678     }
9679
9680   val = args[coding_arg_for_unibyte];
9681   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9682
9683   val = args[coding_arg_plist];
9684   CHECK_LIST (val);
9685   CODING_ATTR_PLIST (attrs) = val;
9686
9687   if (EQ (coding_type, Qcharset))
9688     {
9689       /* Generate a lisp vector of 256 elements.  Each element is nil,
9690          integer, or a list of charset IDs.
9691
9692          If Nth element is nil, the byte code N is invalid in this
9693          coding system.
9694
9695          If Nth element is a number NUM, N is the first byte of a
9696          charset whose ID is NUM.
9697
9698          If Nth element is a list of charset IDs, N is the first byte
9699          of one of them.  The list is sorted by dimensions of the
9700          charsets.  A charset of smaller dimension comes first. */
9701       val = Fmake_vector (make_number (256), Qnil);
9702
9703       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9704         {
9705           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9706           int dim = CHARSET_DIMENSION (charset);
9707           int idx = (dim - 1) * 4;
9708
9709           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9710             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9711
9712           for (i = charset->code_space[idx];
9713                i <= charset->code_space[idx + 1]; i++)
9714             {
9715               Lisp_Object tmp, tmp2;
9716               int dim2;
9717
9718               tmp = AREF (val, i);
9719               if (NILP (tmp))
9720                 tmp = XCAR (tail);
9721               else if (NUMBERP (tmp))
9722                 {
9723                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9724                   if (dim < dim2)
9725                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9726                   else
9727                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9728                 }
9729               else
9730                 {
9731                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9732                     {
9733                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9734                       if (dim < dim2)
9735                         break;
9736                     }
9737                   if (NILP (tmp2))
9738                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9739                   else
9740                     {
9741                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9742                       XSETCAR (tmp2, XCAR (tail));
9743                     }
9744                 }
9745               ASET (val, i, tmp);
9746             }
9747         }
9748       ASET (attrs, coding_attr_charset_valids, val);
9749       category = coding_category_charset;
9750     }
9751   else if (EQ (coding_type, Qccl))
9752     {
9753       Lisp_Object valids;
9754
9755       if (nargs < coding_arg_ccl_max)
9756         goto short_args;
9757
9758       val = args[coding_arg_ccl_decoder];
9759       CHECK_CCL_PROGRAM (val);
9760       if (VECTORP (val))
9761         val = Fcopy_sequence (val);
9762       ASET (attrs, coding_attr_ccl_decoder, val);
9763
9764       val = args[coding_arg_ccl_encoder];
9765       CHECK_CCL_PROGRAM (val);
9766       if (VECTORP (val))
9767         val = Fcopy_sequence (val);
9768       ASET (attrs, coding_attr_ccl_encoder, val);
9769
9770       val = args[coding_arg_ccl_valids];
9771       valids = Fmake_string (make_number (256), make_number (0));
9772       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9773         {
9774           int from, to;
9775
9776           val = Fcar (tail);
9777           if (INTEGERP (val))
9778             {
9779               from = to = XINT (val);
9780               if (from < 0 || from > 255)
9781                 args_out_of_range_3 (val, make_number (0), make_number (255));
9782             }
9783           else
9784             {
9785               CHECK_CONS (val);
9786               CHECK_NATNUM_CAR (val);
9787               CHECK_NATNUM_CDR (val);
9788               from = XINT (XCAR (val));
9789               if (from > 255)
9790                 args_out_of_range_3 (XCAR (val),
9791                                      make_number (0), make_number (255));
9792               to = XINT (XCDR (val));
9793               if (to < from || to > 255)
9794                 args_out_of_range_3 (XCDR (val),
9795                                      XCAR (val), make_number (255));
9796             }
9797           for (i = from; i <= to; i++)
9798             SSET (valids, i, 1);
9799         }
9800       ASET (attrs, coding_attr_ccl_valids, valids);
9801
9802       category = coding_category_ccl;
9803     }
9804   else if (EQ (coding_type, Qutf_16))
9805     {
9806       Lisp_Object bom, endian;
9807
9808       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9809
9810       if (nargs < coding_arg_utf16_max)
9811         goto short_args;
9812
9813       bom = args[coding_arg_utf16_bom];
9814       if (! NILP (bom) && ! EQ (bom, Qt))
9815         {
9816           CHECK_CONS (bom);
9817           val = XCAR (bom);
9818           CHECK_CODING_SYSTEM (val);
9819           val = XCDR (bom);
9820           CHECK_CODING_SYSTEM (val);
9821         }
9822       ASET (attrs, coding_attr_utf_bom, bom);
9823
9824       endian = args[coding_arg_utf16_endian];
9825       CHECK_SYMBOL (endian);
9826       if (NILP (endian))
9827         endian = Qbig;
9828       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9829         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9830       ASET (attrs, coding_attr_utf_16_endian, endian);
9831
9832       category = (CONSP (bom)
9833                   ? coding_category_utf_16_auto
9834                   : NILP (bom)
9835                   ? (EQ (endian, Qbig)
9836                      ? coding_category_utf_16_be_nosig
9837                      : coding_category_utf_16_le_nosig)
9838                   : (EQ (endian, Qbig)
9839                      ? coding_category_utf_16_be
9840                      : coding_category_utf_16_le));
9841     }
9842   else if (EQ (coding_type, Qiso_2022))
9843     {
9844       Lisp_Object initial, reg_usage, request, flags;
9845
9846       if (nargs < coding_arg_iso2022_max)
9847         goto short_args;
9848
9849       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9850       CHECK_VECTOR (initial);
9851       for (i = 0; i < 4; i++)
9852         {
9853           val = Faref (initial, make_number (i));
9854           if (! NILP (val))
9855             {
9856               struct charset *charset;
9857
9858               CHECK_CHARSET_GET_CHARSET (val, charset);
9859               ASET (initial, i, make_number (CHARSET_ID (charset)));
9860               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9861                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9862             }
9863           else
9864             ASET (initial, i, make_number (-1));
9865         }
9866
9867       reg_usage = args[coding_arg_iso2022_reg_usage];
9868       CHECK_CONS (reg_usage);
9869       CHECK_NUMBER_CAR (reg_usage);
9870       CHECK_NUMBER_CDR (reg_usage);
9871
9872       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9873       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9874         {
9875           int id;
9876           Lisp_Object tmp1;
9877
9878           val = Fcar (tail);
9879           CHECK_CONS (val);
9880           tmp1 = XCAR (val);
9881           CHECK_CHARSET_GET_ID (tmp1, id);
9882           CHECK_NATNUM_CDR (val);
9883           if (XINT (XCDR (val)) >= 4)
9884             error ("Invalid graphic register number: %"pI"d", XINT (XCDR (val)));
9885           XSETCAR (val, make_number (id));
9886         }
9887
9888       flags = args[coding_arg_iso2022_flags];
9889       CHECK_NATNUM (flags);
9890       i = XINT (flags);
9891       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9892         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9893
9894       ASET (attrs, coding_attr_iso_initial, initial);
9895       ASET (attrs, coding_attr_iso_usage, reg_usage);
9896       ASET (attrs, coding_attr_iso_request, request);
9897       ASET (attrs, coding_attr_iso_flags, flags);
9898       setup_iso_safe_charsets (attrs);
9899
9900       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9901         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9902                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9903                     ? coding_category_iso_7_else
9904                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9905                     ? coding_category_iso_7
9906                     : coding_category_iso_7_tight);
9907       else
9908         {
9909           int id = XINT (AREF (initial, 1));
9910
9911           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9912                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9913                        || id < 0)
9914                       ? coding_category_iso_8_else
9915                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9916                       ? coding_category_iso_8_1
9917                       : coding_category_iso_8_2);
9918         }
9919       if (category != coding_category_iso_8_1
9920           && category != coding_category_iso_8_2)
9921         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9922     }
9923   else if (EQ (coding_type, Qemacs_mule))
9924     {
9925       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9926         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9927       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9928       category = coding_category_emacs_mule;
9929     }
9930   else if (EQ (coding_type, Qshift_jis))
9931     {
9932
9933       struct charset *charset;
9934
9935       if (XINT (Flength (charset_list)) != 3
9936           && XINT (Flength (charset_list)) != 4)
9937         error ("There should be three or four charsets");
9938
9939       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9940       if (CHARSET_DIMENSION (charset) != 1)
9941         error ("Dimension of charset %s is not one",
9942                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9943       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9944         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9945
9946       charset_list = XCDR (charset_list);
9947       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9948       if (CHARSET_DIMENSION (charset) != 1)
9949         error ("Dimension of charset %s is not one",
9950                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9951
9952       charset_list = XCDR (charset_list);
9953       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9954       if (CHARSET_DIMENSION (charset) != 2)
9955         error ("Dimension of charset %s is not two",
9956                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9957
9958       charset_list = XCDR (charset_list);
9959       if (! NILP (charset_list))
9960         {
9961           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9962           if (CHARSET_DIMENSION (charset) != 2)
9963             error ("Dimension of charset %s is not two",
9964                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9965         }
9966
9967       category = coding_category_sjis;
9968       Vsjis_coding_system = name;
9969     }
9970   else if (EQ (coding_type, Qbig5))
9971     {
9972       struct charset *charset;
9973
9974       if (XINT (Flength (charset_list)) != 2)
9975         error ("There should be just two charsets");
9976
9977       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9978       if (CHARSET_DIMENSION (charset) != 1)
9979         error ("Dimension of charset %s is not one",
9980                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9981       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9982         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9983
9984       charset_list = XCDR (charset_list);
9985       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9986       if (CHARSET_DIMENSION (charset) != 2)
9987         error ("Dimension of charset %s is not two",
9988                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9989
9990       category = coding_category_big5;
9991       Vbig5_coding_system = name;
9992     }
9993   else if (EQ (coding_type, Qraw_text))
9994     {
9995       category = coding_category_raw_text;
9996       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9997     }
9998   else if (EQ (coding_type, Qutf_8))
9999     {
10000       Lisp_Object bom;
10001
10002       if (nargs < coding_arg_utf8_max)
10003         goto short_args;
10004
10005       bom = args[coding_arg_utf8_bom];
10006       if (! NILP (bom) && ! EQ (bom, Qt))
10007         {
10008           CHECK_CONS (bom);
10009           val = XCAR (bom);
10010           CHECK_CODING_SYSTEM (val);
10011           val = XCDR (bom);
10012           CHECK_CODING_SYSTEM (val);
10013         }
10014       ASET (attrs, coding_attr_utf_bom, bom);
10015       if (NILP (bom))
10016         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10017
10018       category = (CONSP (bom) ? coding_category_utf_8_auto
10019                   : NILP (bom) ? coding_category_utf_8_nosig
10020                   : coding_category_utf_8_sig);
10021     }
10022   else if (EQ (coding_type, Qundecided))
10023     category = coding_category_undecided;
10024   else
10025     error ("Invalid coding system type: %s",
10026            SDATA (SYMBOL_NAME (coding_type)));
10027
10028   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10029   CODING_ATTR_PLIST (attrs)
10030     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10031                                 CODING_ATTR_PLIST (attrs)));
10032   CODING_ATTR_PLIST (attrs)
10033     = Fcons (QCascii_compatible_p,
10034              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10035                     CODING_ATTR_PLIST (attrs)));
10036
10037   eol_type = args[coding_arg_eol_type];
10038   if (! NILP (eol_type)
10039       && ! EQ (eol_type, Qunix)
10040       && ! EQ (eol_type, Qdos)
10041       && ! EQ (eol_type, Qmac))
10042     error ("Invalid eol-type");
10043
10044   aliases = Fcons (name, Qnil);
10045
10046   if (NILP (eol_type))
10047     {
10048       eol_type = make_subsidiaries (name);
10049       for (i = 0; i < 3; i++)
10050         {
10051           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10052
10053           this_name = AREF (eol_type, i);
10054           this_aliases = Fcons (this_name, Qnil);
10055           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10056           this_spec = Fmake_vector (make_number (3), attrs);
10057           ASET (this_spec, 1, this_aliases);
10058           ASET (this_spec, 2, this_eol_type);
10059           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10060           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10061           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10062           if (NILP (val))
10063             Vcoding_system_alist
10064               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10065                        Vcoding_system_alist);
10066         }
10067     }
10068
10069   spec_vec = Fmake_vector (make_number (3), attrs);
10070   ASET (spec_vec, 1, aliases);
10071   ASET (spec_vec, 2, eol_type);
10072
10073   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10074   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10075   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10076   if (NILP (val))
10077     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10078                                   Vcoding_system_alist);
10079
10080   {
10081     int id = coding_categories[category].id;
10082
10083     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10084       setup_coding_system (name, &coding_categories[category]);
10085   }
10086
10087   return Qnil;
10088
10089  short_args:
10090   return Fsignal (Qwrong_number_of_arguments,
10091                   Fcons (intern ("define-coding-system-internal"),
10092                          make_number (nargs)));
10093 }
10094
10095
10096 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10097        3, 3, 0,
10098        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10099   (Lisp_Object coding_system, Lisp_Object prop, Lisp_Object val)
10100 {
10101   Lisp_Object spec, attrs;
10102
10103   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10104   attrs = AREF (spec, 0);
10105   if (EQ (prop, QCmnemonic))
10106     {
10107       if (! STRINGP (val))
10108         CHECK_CHARACTER (val);
10109       CODING_ATTR_MNEMONIC (attrs) = val;
10110     }
10111   else if (EQ (prop, QCdefault_char))
10112     {
10113       if (NILP (val))
10114         val = make_number (' ');
10115       else
10116         CHECK_CHARACTER (val);
10117       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10118     }
10119   else if (EQ (prop, QCdecode_translation_table))
10120     {
10121       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10122         CHECK_SYMBOL (val);
10123       CODING_ATTR_DECODE_TBL (attrs) = val;
10124     }
10125   else if (EQ (prop, QCencode_translation_table))
10126     {
10127       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10128         CHECK_SYMBOL (val);
10129       CODING_ATTR_ENCODE_TBL (attrs) = val;
10130     }
10131   else if (EQ (prop, QCpost_read_conversion))
10132     {
10133       CHECK_SYMBOL (val);
10134       CODING_ATTR_POST_READ (attrs) = val;
10135     }
10136   else if (EQ (prop, QCpre_write_conversion))
10137     {
10138       CHECK_SYMBOL (val);
10139       CODING_ATTR_PRE_WRITE (attrs) = val;
10140     }
10141   else if (EQ (prop, QCascii_compatible_p))
10142     {
10143       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10144     }
10145
10146   CODING_ATTR_PLIST (attrs)
10147     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10148   return val;
10149 }
10150
10151
10152 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10153        Sdefine_coding_system_alias, 2, 2, 0,
10154        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10155   (Lisp_Object alias, Lisp_Object coding_system)
10156 {
10157   Lisp_Object spec, aliases, eol_type, val;
10158
10159   CHECK_SYMBOL (alias);
10160   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10161   aliases = AREF (spec, 1);
10162   /* ALIASES should be a list of length more than zero, and the first
10163      element is a base coding system.  Append ALIAS at the tail of the
10164      list.  */
10165   while (!NILP (XCDR (aliases)))
10166     aliases = XCDR (aliases);
10167   XSETCDR (aliases, Fcons (alias, Qnil));
10168
10169   eol_type = AREF (spec, 2);
10170   if (VECTORP (eol_type))
10171     {
10172       Lisp_Object subsidiaries;
10173       int i;
10174
10175       subsidiaries = make_subsidiaries (alias);
10176       for (i = 0; i < 3; i++)
10177         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10178                                      AREF (eol_type, i));
10179     }
10180
10181   Fputhash (alias, spec, Vcoding_system_hash_table);
10182   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10183   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10184   if (NILP (val))
10185     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10186                                   Vcoding_system_alist);
10187
10188   return Qnil;
10189 }
10190
10191 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10192        1, 1, 0,
10193        doc: /* Return the base of CODING-SYSTEM.
10194 Any alias or subsidiary coding system is not a base coding system.  */)
10195   (Lisp_Object coding_system)
10196 {
10197   Lisp_Object spec, attrs;
10198
10199   if (NILP (coding_system))
10200     return (Qno_conversion);
10201   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10202   attrs = AREF (spec, 0);
10203   return CODING_ATTR_BASE_NAME (attrs);
10204 }
10205
10206 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10207        1, 1, 0,
10208        doc: "Return the property list of CODING-SYSTEM.")
10209   (Lisp_Object coding_system)
10210 {
10211   Lisp_Object spec, attrs;
10212
10213   if (NILP (coding_system))
10214     coding_system = Qno_conversion;
10215   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10216   attrs = AREF (spec, 0);
10217   return CODING_ATTR_PLIST (attrs);
10218 }
10219
10220
10221 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10222        1, 1, 0,
10223        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10224   (Lisp_Object coding_system)
10225 {
10226   Lisp_Object spec;
10227
10228   if (NILP (coding_system))
10229     coding_system = Qno_conversion;
10230   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10231   return AREF (spec, 1);
10232 }
10233
10234 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10235        Scoding_system_eol_type, 1, 1, 0,
10236        doc: /* Return eol-type of CODING-SYSTEM.
10237 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10238
10239 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10240 and CR respectively.
10241
10242 A vector value indicates that a format of end-of-line should be
10243 detected automatically.  Nth element of the vector is the subsidiary
10244 coding system whose eol-type is N.  */)
10245   (Lisp_Object coding_system)
10246 {
10247   Lisp_Object spec, eol_type;
10248   int n;
10249
10250   if (NILP (coding_system))
10251     coding_system = Qno_conversion;
10252   if (! CODING_SYSTEM_P (coding_system))
10253     return Qnil;
10254   spec = CODING_SYSTEM_SPEC (coding_system);
10255   eol_type = AREF (spec, 2);
10256   if (VECTORP (eol_type))
10257     return Fcopy_sequence (eol_type);
10258   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10259   return make_number (n);
10260 }
10261
10262 #endif /* emacs */
10263
10264 \f
10265 /*** 9. Post-amble ***/
10266
10267 void
10268 init_coding_once (void)
10269 {
10270   int i;
10271
10272   for (i = 0; i < coding_category_max; i++)
10273     {
10274       coding_categories[i].id = -1;
10275       coding_priorities[i] = i;
10276     }
10277
10278   /* ISO2022 specific initialize routine.  */
10279   for (i = 0; i < 0x20; i++)
10280     iso_code_class[i] = ISO_control_0;
10281   for (i = 0x21; i < 0x7F; i++)
10282     iso_code_class[i] = ISO_graphic_plane_0;
10283   for (i = 0x80; i < 0xA0; i++)
10284     iso_code_class[i] = ISO_control_1;
10285   for (i = 0xA1; i < 0xFF; i++)
10286     iso_code_class[i] = ISO_graphic_plane_1;
10287   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10288   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10289   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10290   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10291   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10292   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10293   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10294   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10295   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10296
10297   for (i = 0; i < 256; i++)
10298     {
10299       emacs_mule_bytes[i] = 1;
10300     }
10301   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10302   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10303   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10304   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10305 }
10306
10307 #ifdef emacs
10308
10309 void
10310 syms_of_coding (void)
10311 {
10312   staticpro (&Vcoding_system_hash_table);
10313   {
10314     Lisp_Object args[2];
10315     args[0] = QCtest;
10316     args[1] = Qeq;
10317     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10318   }
10319
10320   staticpro (&Vsjis_coding_system);
10321   Vsjis_coding_system = Qnil;
10322
10323   staticpro (&Vbig5_coding_system);
10324   Vbig5_coding_system = Qnil;
10325
10326   staticpro (&Vcode_conversion_reused_workbuf);
10327   Vcode_conversion_reused_workbuf = Qnil;
10328
10329   staticpro (&Vcode_conversion_workbuf_name);
10330   Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
10331
10332   reused_workbuf_in_use = 0;
10333
10334   DEFSYM (Qcharset, "charset");
10335   DEFSYM (Qtarget_idx, "target-idx");
10336   DEFSYM (Qcoding_system_history, "coding-system-history");
10337   Fset (Qcoding_system_history, Qnil);
10338
10339   /* Target FILENAME is the first argument.  */
10340   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10341   /* Target FILENAME is the third argument.  */
10342   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10343
10344   DEFSYM (Qcall_process, "call-process");
10345   /* Target PROGRAM is the first argument.  */
10346   Fput (Qcall_process, Qtarget_idx, make_number (0));
10347
10348   DEFSYM (Qcall_process_region, "call-process-region");
10349   /* Target PROGRAM is the third argument.  */
10350   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10351
10352   DEFSYM (Qstart_process, "start-process");
10353   /* Target PROGRAM is the third argument.  */
10354   Fput (Qstart_process, Qtarget_idx, make_number (2));
10355
10356   DEFSYM (Qopen_network_stream, "open-network-stream");
10357   /* Target SERVICE is the fourth argument.  */
10358   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10359
10360   DEFSYM (Qcoding_system, "coding-system");
10361   DEFSYM (Qcoding_aliases, "coding-aliases");
10362
10363   DEFSYM (Qeol_type, "eol-type");
10364   DEFSYM (Qunix, "unix");
10365   DEFSYM (Qdos, "dos");
10366
10367   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10368   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10369   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10370   DEFSYM (Qdefault_char, "default-char");
10371   DEFSYM (Qundecided, "undecided");
10372   DEFSYM (Qno_conversion, "no-conversion");
10373   DEFSYM (Qraw_text, "raw-text");
10374
10375   DEFSYM (Qiso_2022, "iso-2022");
10376
10377   DEFSYM (Qutf_8, "utf-8");
10378   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10379
10380   DEFSYM (Qutf_16, "utf-16");
10381   DEFSYM (Qbig, "big");
10382   DEFSYM (Qlittle, "little");
10383
10384   DEFSYM (Qshift_jis, "shift-jis");
10385   DEFSYM (Qbig5, "big5");
10386
10387   DEFSYM (Qcoding_system_p, "coding-system-p");
10388
10389   DEFSYM (Qcoding_system_error, "coding-system-error");
10390   Fput (Qcoding_system_error, Qerror_conditions,
10391         pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
10392   Fput (Qcoding_system_error, Qerror_message,
10393         make_pure_c_string ("Invalid coding system"));
10394
10395   /* Intern this now in case it isn't already done.
10396      Setting this variable twice is harmless.
10397      But don't staticpro it here--that is done in alloc.c.  */
10398   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10399
10400   DEFSYM (Qtranslation_table, "translation-table");
10401   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10402   DEFSYM (Qtranslation_table_id, "translation-table-id");
10403   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10404   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10405
10406   DEFSYM (Qvalid_codes, "valid-codes");
10407
10408   DEFSYM (Qemacs_mule, "emacs-mule");
10409
10410   DEFSYM (QCcategory, ":category");
10411   DEFSYM (QCmnemonic, ":mnemonic");
10412   DEFSYM (QCdefault_char, ":default-char");
10413   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10414   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10415   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10416   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10417   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10418
10419   Vcoding_category_table
10420     = Fmake_vector (make_number (coding_category_max), Qnil);
10421   staticpro (&Vcoding_category_table);
10422   /* Followings are target of code detection.  */
10423   ASET (Vcoding_category_table, coding_category_iso_7,
10424         intern_c_string ("coding-category-iso-7"));
10425   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10426         intern_c_string ("coding-category-iso-7-tight"));
10427   ASET (Vcoding_category_table, coding_category_iso_8_1,
10428         intern_c_string ("coding-category-iso-8-1"));
10429   ASET (Vcoding_category_table, coding_category_iso_8_2,
10430         intern_c_string ("coding-category-iso-8-2"));
10431   ASET (Vcoding_category_table, coding_category_iso_7_else,
10432         intern_c_string ("coding-category-iso-7-else"));
10433   ASET (Vcoding_category_table, coding_category_iso_8_else,
10434         intern_c_string ("coding-category-iso-8-else"));
10435   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10436         intern_c_string ("coding-category-utf-8-auto"));
10437   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10438         intern_c_string ("coding-category-utf-8"));
10439   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10440         intern_c_string ("coding-category-utf-8-sig"));
10441   ASET (Vcoding_category_table, coding_category_utf_16_be,
10442         intern_c_string ("coding-category-utf-16-be"));
10443   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10444         intern_c_string ("coding-category-utf-16-auto"));
10445   ASET (Vcoding_category_table, coding_category_utf_16_le,
10446         intern_c_string ("coding-category-utf-16-le"));
10447   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10448         intern_c_string ("coding-category-utf-16-be-nosig"));
10449   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10450         intern_c_string ("coding-category-utf-16-le-nosig"));
10451   ASET (Vcoding_category_table, coding_category_charset,
10452         intern_c_string ("coding-category-charset"));
10453   ASET (Vcoding_category_table, coding_category_sjis,
10454         intern_c_string ("coding-category-sjis"));
10455   ASET (Vcoding_category_table, coding_category_big5,
10456         intern_c_string ("coding-category-big5"));
10457   ASET (Vcoding_category_table, coding_category_ccl,
10458         intern_c_string ("coding-category-ccl"));
10459   ASET (Vcoding_category_table, coding_category_emacs_mule,
10460         intern_c_string ("coding-category-emacs-mule"));
10461   /* Followings are NOT target of code detection.  */
10462   ASET (Vcoding_category_table, coding_category_raw_text,
10463         intern_c_string ("coding-category-raw-text"));
10464   ASET (Vcoding_category_table, coding_category_undecided,
10465         intern_c_string ("coding-category-undecided"));
10466
10467   DEFSYM (Qinsufficient_source, "insufficient-source");
10468   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10469   DEFSYM (Qinvalid_source, "invalid-source");
10470   DEFSYM (Qinterrupted, "interrupted");
10471   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10472   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10473
10474   defsubr (&Scoding_system_p);
10475   defsubr (&Sread_coding_system);
10476   defsubr (&Sread_non_nil_coding_system);
10477   defsubr (&Scheck_coding_system);
10478   defsubr (&Sdetect_coding_region);
10479   defsubr (&Sdetect_coding_string);
10480   defsubr (&Sfind_coding_systems_region_internal);
10481   defsubr (&Sunencodable_char_position);
10482   defsubr (&Scheck_coding_systems_region);
10483   defsubr (&Sdecode_coding_region);
10484   defsubr (&Sencode_coding_region);
10485   defsubr (&Sdecode_coding_string);
10486   defsubr (&Sencode_coding_string);
10487   defsubr (&Sdecode_sjis_char);
10488   defsubr (&Sencode_sjis_char);
10489   defsubr (&Sdecode_big5_char);
10490   defsubr (&Sencode_big5_char);
10491   defsubr (&Sset_terminal_coding_system_internal);
10492   defsubr (&Sset_safe_terminal_coding_system_internal);
10493   defsubr (&Sterminal_coding_system);
10494   defsubr (&Sset_keyboard_coding_system_internal);
10495   defsubr (&Skeyboard_coding_system);
10496   defsubr (&Sfind_operation_coding_system);
10497   defsubr (&Sset_coding_system_priority);
10498   defsubr (&Sdefine_coding_system_internal);
10499   defsubr (&Sdefine_coding_system_alias);
10500   defsubr (&Scoding_system_put);
10501   defsubr (&Scoding_system_base);
10502   defsubr (&Scoding_system_plist);
10503   defsubr (&Scoding_system_aliases);
10504   defsubr (&Scoding_system_eol_type);
10505   defsubr (&Scoding_system_priority_list);
10506
10507   DEFVAR_LISP ("coding-system-list", Vcoding_system_list,
10508                doc: /* List of coding systems.
10509
10510 Do not alter the value of this variable manually.  This variable should be
10511 updated by the functions `define-coding-system' and
10512 `define-coding-system-alias'.  */);
10513   Vcoding_system_list = Qnil;
10514
10515   DEFVAR_LISP ("coding-system-alist", Vcoding_system_alist,
10516                doc: /* Alist of coding system names.
10517 Each element is one element list of coding system name.
10518 This variable is given to `completing-read' as COLLECTION argument.
10519
10520 Do not alter the value of this variable manually.  This variable should be
10521 updated by the functions `make-coding-system' and
10522 `define-coding-system-alias'.  */);
10523   Vcoding_system_alist = Qnil;
10524
10525   DEFVAR_LISP ("coding-category-list", Vcoding_category_list,
10526                doc: /* List of coding-categories (symbols) ordered by priority.
10527
10528 On detecting a coding system, Emacs tries code detection algorithms
10529 associated with each coding-category one by one in this order.  When
10530 one algorithm agrees with a byte sequence of source text, the coding
10531 system bound to the corresponding coding-category is selected.
10532
10533 Don't modify this variable directly, but use `set-coding-system-priority'.  */);
10534   {
10535     int i;
10536
10537     Vcoding_category_list = Qnil;
10538     for (i = coding_category_max - 1; i >= 0; i--)
10539       Vcoding_category_list
10540         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10541                  Vcoding_category_list);
10542   }
10543
10544   DEFVAR_LISP ("coding-system-for-read", Vcoding_system_for_read,
10545                doc: /* Specify the coding system for read operations.
10546 It is useful to bind this variable with `let', but do not set it globally.
10547 If the value is a coding system, it is used for decoding on read operation.
10548 If not, an appropriate element is used from one of the coding system alists.
10549 There are three such tables: `file-coding-system-alist',
10550 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10551   Vcoding_system_for_read = Qnil;
10552
10553   DEFVAR_LISP ("coding-system-for-write", Vcoding_system_for_write,
10554                doc: /* Specify the coding system for write operations.
10555 Programs bind this variable with `let', but you should not set it globally.
10556 If the value is a coding system, it is used for encoding of output,
10557 when writing it to a file and when sending it to a file or subprocess.
10558
10559 If this does not specify a coding system, an appropriate element
10560 is used from one of the coding system alists.
10561 There are three such tables: `file-coding-system-alist',
10562 `process-coding-system-alist', and `network-coding-system-alist'.
10563 For output to files, if the above procedure does not specify a coding system,
10564 the value of `buffer-file-coding-system' is used.  */);
10565   Vcoding_system_for_write = Qnil;
10566
10567   DEFVAR_LISP ("last-coding-system-used", Vlast_coding_system_used,
10568                doc: /*
10569 Coding system used in the latest file or process I/O.  */);
10570   Vlast_coding_system_used = Qnil;
10571
10572   DEFVAR_LISP ("last-code-conversion-error", Vlast_code_conversion_error,
10573                doc: /*
10574 Error status of the last code conversion.
10575
10576 When an error was detected in the last code conversion, this variable
10577 is set to one of the following symbols.
10578   `insufficient-source'
10579   `inconsistent-eol'
10580   `invalid-source'
10581   `interrupted'
10582   `insufficient-memory'
10583 When no error was detected, the value doesn't change.  So, to check
10584 the error status of a code conversion by this variable, you must
10585 explicitly set this variable to nil before performing code
10586 conversion.  */);
10587   Vlast_code_conversion_error = Qnil;
10588
10589   DEFVAR_BOOL ("inhibit-eol-conversion", inhibit_eol_conversion,
10590                doc: /*
10591 *Non-nil means always inhibit code conversion of end-of-line format.
10592 See info node `Coding Systems' and info node `Text and Binary' concerning
10593 such conversion.  */);
10594   inhibit_eol_conversion = 0;
10595
10596   DEFVAR_BOOL ("inherit-process-coding-system", inherit_process_coding_system,
10597                doc: /*
10598 Non-nil means process buffer inherits coding system of process output.
10599 Bind it to t if the process output is to be treated as if it were a file
10600 read from some filesystem.  */);
10601   inherit_process_coding_system = 0;
10602
10603   DEFVAR_LISP ("file-coding-system-alist", Vfile_coding_system_alist,
10604                doc: /*
10605 Alist to decide a coding system to use for a file I/O operation.
10606 The format is ((PATTERN . VAL) ...),
10607 where PATTERN is a regular expression matching a file name,
10608 VAL is a coding system, a cons of coding systems, or a function symbol.
10609 If VAL is a coding system, it is used for both decoding and encoding
10610 the file contents.
10611 If VAL is a cons of coding systems, the car part is used for decoding,
10612 and the cdr part is used for encoding.
10613 If VAL is a function symbol, the function must return a coding system
10614 or a cons of coding systems which are used as above.  The function is
10615 called with an argument that is a list of the arguments with which
10616 `find-operation-coding-system' was called.  If the function can't decide
10617 a coding system, it can return `undecided' so that the normal
10618 code-detection is performed.
10619
10620 See also the function `find-operation-coding-system'
10621 and the variable `auto-coding-alist'.  */);
10622   Vfile_coding_system_alist = Qnil;
10623
10624   DEFVAR_LISP ("process-coding-system-alist", Vprocess_coding_system_alist,
10625                doc: /*
10626 Alist to decide a coding system to use for a process I/O operation.
10627 The format is ((PATTERN . VAL) ...),
10628 where PATTERN is a regular expression matching a program name,
10629 VAL is a coding system, a cons of coding systems, or a function symbol.
10630 If VAL is a coding system, it is used for both decoding what received
10631 from the program and encoding what sent to the program.
10632 If VAL is a cons of coding systems, the car part is used for decoding,
10633 and the cdr part is used for encoding.
10634 If VAL is a function symbol, the function must return a coding system
10635 or a cons of coding systems which are used as above.
10636
10637 See also the function `find-operation-coding-system'.  */);
10638   Vprocess_coding_system_alist = Qnil;
10639
10640   DEFVAR_LISP ("network-coding-system-alist", Vnetwork_coding_system_alist,
10641                doc: /*
10642 Alist to decide a coding system to use for a network I/O operation.
10643 The format is ((PATTERN . VAL) ...),
10644 where PATTERN is a regular expression matching a network service name
10645 or is a port number to connect to,
10646 VAL is a coding system, a cons of coding systems, or a function symbol.
10647 If VAL is a coding system, it is used for both decoding what received
10648 from the network stream and encoding what sent to the network stream.
10649 If VAL is a cons of coding systems, the car part is used for decoding,
10650 and the cdr part is used for encoding.
10651 If VAL is a function symbol, the function must return a coding system
10652 or a cons of coding systems which are used as above.
10653
10654 See also the function `find-operation-coding-system'.  */);
10655   Vnetwork_coding_system_alist = Qnil;
10656
10657   DEFVAR_LISP ("locale-coding-system", Vlocale_coding_system,
10658                doc: /* Coding system to use with system messages.
10659 Also used for decoding keyboard input on X Window system.  */);
10660   Vlocale_coding_system = Qnil;
10661
10662   /* The eol mnemonics are reset in startup.el system-dependently.  */
10663   DEFVAR_LISP ("eol-mnemonic-unix", eol_mnemonic_unix,
10664                doc: /*
10665 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10666   eol_mnemonic_unix = make_pure_c_string (":");
10667
10668   DEFVAR_LISP ("eol-mnemonic-dos", eol_mnemonic_dos,
10669                doc: /*
10670 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10671   eol_mnemonic_dos = make_pure_c_string ("\\");
10672
10673   DEFVAR_LISP ("eol-mnemonic-mac", eol_mnemonic_mac,
10674                doc: /*
10675 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10676   eol_mnemonic_mac = make_pure_c_string ("/");
10677
10678   DEFVAR_LISP ("eol-mnemonic-undecided", eol_mnemonic_undecided,
10679                doc: /*
10680 *String displayed in mode line when end-of-line format is not yet determined.  */);
10681   eol_mnemonic_undecided = make_pure_c_string (":");
10682
10683   DEFVAR_LISP ("enable-character-translation", Venable_character_translation,
10684                doc: /*
10685 *Non-nil enables character translation while encoding and decoding.  */);
10686   Venable_character_translation = Qt;
10687
10688   DEFVAR_LISP ("standard-translation-table-for-decode",
10689                Vstandard_translation_table_for_decode,
10690                doc: /* Table for translating characters while decoding.  */);
10691   Vstandard_translation_table_for_decode = Qnil;
10692
10693   DEFVAR_LISP ("standard-translation-table-for-encode",
10694                Vstandard_translation_table_for_encode,
10695                doc: /* Table for translating characters while encoding.  */);
10696   Vstandard_translation_table_for_encode = Qnil;
10697
10698   DEFVAR_LISP ("charset-revision-table", Vcharset_revision_table,
10699                doc: /* Alist of charsets vs revision numbers.
10700 While encoding, if a charset (car part of an element) is found,
10701 designate it with the escape sequence identifying revision (cdr part
10702 of the element).  */);
10703   Vcharset_revision_table = Qnil;
10704
10705   DEFVAR_LISP ("default-process-coding-system",
10706                Vdefault_process_coding_system,
10707                doc: /* Cons of coding systems used for process I/O by default.
10708 The car part is used for decoding a process output,
10709 the cdr part is used for encoding a text to be sent to a process.  */);
10710   Vdefault_process_coding_system = Qnil;
10711
10712   DEFVAR_LISP ("latin-extra-code-table", Vlatin_extra_code_table,
10713                doc: /*
10714 Table of extra Latin codes in the range 128..159 (inclusive).
10715 This is a vector of length 256.
10716 If Nth element is non-nil, the existence of code N in a file
10717 \(or output of subprocess) doesn't prevent it to be detected as
10718 a coding system of ISO 2022 variant which has a flag
10719 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10720 or reading output of a subprocess.
10721 Only 128th through 159th elements have a meaning.  */);
10722   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10723
10724   DEFVAR_LISP ("select-safe-coding-system-function",
10725                Vselect_safe_coding_system_function,
10726                doc: /*
10727 Function to call to select safe coding system for encoding a text.
10728
10729 If set, this function is called to force a user to select a proper
10730 coding system which can encode the text in the case that a default
10731 coding system used in each operation can't encode the text.  The
10732 function should take care that the buffer is not modified while
10733 the coding system is being selected.
10734
10735 The default value is `select-safe-coding-system' (which see).  */);
10736   Vselect_safe_coding_system_function = Qnil;
10737
10738   DEFVAR_BOOL ("coding-system-require-warning",
10739                coding_system_require_warning,
10740                doc: /* Internal use only.
10741 If non-nil, on writing a file, `select-safe-coding-system-function' is
10742 called even if `coding-system-for-write' is non-nil.  The command
10743 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10744   coding_system_require_warning = 0;
10745
10746
10747   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10748                inhibit_iso_escape_detection,
10749                doc: /*
10750 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10751
10752 When Emacs reads text, it tries to detect how the text is encoded.
10753 This code detection is sensitive to escape sequences.  If Emacs sees
10754 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10755 of the ISO2022 encodings, and decodes text by the corresponding coding
10756 system (e.g. `iso-2022-7bit').
10757
10758 However, there may be a case that you want to read escape sequences in
10759 a file as is.  In such a case, you can set this variable to non-nil.
10760 Then the code detection will ignore any escape sequences, and no text is
10761 detected as encoded in some ISO-2022 encoding.  The result is that all
10762 escape sequences become visible in a buffer.
10763
10764 The default value is nil, and it is strongly recommended not to change
10765 it.  That is because many Emacs Lisp source files that contain
10766 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10767 in Emacs's distribution, and they won't be decoded correctly on
10768 reading if you suppress escape sequence detection.
10769
10770 The other way to read escape sequences in a file without decoding is
10771 to explicitly specify some coding system that doesn't use ISO-2022
10772 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10773   inhibit_iso_escape_detection = 0;
10774
10775   DEFVAR_BOOL ("inhibit-null-byte-detection",
10776                inhibit_null_byte_detection,
10777                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10778 By default, Emacs treats it as binary data, and does not attempt to
10779 decode it.  The effect is as if you specified `no-conversion' for
10780 reading that text.
10781
10782 Set this to non-nil when a regular text happens to include null bytes.
10783 Examples are Index nodes of Info files and null-byte delimited output
10784 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10785 decode text as usual.  */);
10786   inhibit_null_byte_detection = 0;
10787
10788   DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
10789                doc: /* Char table for translating self-inserting characters.
10790 This is applied to the result of input methods, not their input.
10791 See also `keyboard-translate-table'.
10792
10793 Use of this variable for character code unification was rendered
10794 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10795 internal character representation.  */);
10796     Vtranslation_table_for_input = Qnil;
10797
10798   {
10799     Lisp_Object args[coding_arg_max];
10800     Lisp_Object plist[16];
10801     int i;
10802
10803     for (i = 0; i < coding_arg_max; i++)
10804       args[i] = Qnil;
10805
10806     plist[0] = intern_c_string (":name");
10807     plist[1] = args[coding_arg_name] = Qno_conversion;
10808     plist[2] = intern_c_string (":mnemonic");
10809     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10810     plist[4] = intern_c_string (":coding-type");
10811     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10812     plist[6] = intern_c_string (":ascii-compatible-p");
10813     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10814     plist[8] = intern_c_string (":default-char");
10815     plist[9] = args[coding_arg_default_char] = make_number (0);
10816     plist[10] = intern_c_string (":for-unibyte");
10817     plist[11] = args[coding_arg_for_unibyte] = Qt;
10818     plist[12] = intern_c_string (":docstring");
10819     plist[13] = make_pure_c_string ("Do no conversion.\n\
10820 \n\
10821 When you visit a file with this coding, the file is read into a\n\
10822 unibyte buffer as is, thus each byte of a file is treated as a\n\
10823 character.");
10824     plist[14] = intern_c_string (":eol-type");
10825     plist[15] = args[coding_arg_eol_type] = Qunix;
10826     args[coding_arg_plist] = Flist (16, plist);
10827     Fdefine_coding_system_internal (coding_arg_max, args);
10828
10829     plist[1] = args[coding_arg_name] = Qundecided;
10830     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10831     plist[5] = args[coding_arg_coding_type] = Qundecided;
10832     /* This is already set.
10833        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10834     plist[8] = intern_c_string (":charset-list");
10835     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10836     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10837     plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10838     plist[15] = args[coding_arg_eol_type] = Qnil;
10839     args[coding_arg_plist] = Flist (16, plist);
10840     Fdefine_coding_system_internal (coding_arg_max, args);
10841   }
10842
10843   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10844
10845   {
10846     int i;
10847
10848     for (i = 0; i < coding_category_max; i++)
10849       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10850   }
10851 #if defined (DOS_NT)
10852   system_eol_type = Qdos;
10853 #else
10854   system_eol_type = Qunix;
10855 #endif
10856   staticpro (&system_eol_type);
10857 }
10858
10859 char *
10860 emacs_strerror (int error_number)
10861 {
10862   char *str;
10863
10864   synchronize_system_messages_locale ();
10865   str = strerror (error_number);
10866
10867   if (! NILP (Vlocale_coding_system))
10868     {
10869       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10870                                                       Vlocale_coding_system,
10871                                                       0);
10872       str = SSDATA (dec);
10873     }
10874
10875   return str;
10876 }
10877
10878 #endif /* emacs */