src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008, 2009, 2010
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (struct coding_system *coding,
 158                    struct coding_detection_info *detect_info)
 159 {
 160   const unsigned char *src = coding->source;
 161   const unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (struct coding_system *coding)
 206 {
 207   const unsigned char *src = coding->source + coding->consumed;
 208   const unsigned char *src_end = coding->source + coding->src_bytes;
 209   /* SRC_BASE remembers the start position in source in each loop.
 210      The loop will be exited when there's not enough source code, or
 211      when there's no room in CHARBUF for a decoded character.  */
 212   const unsigned char *src_base;
 213   /* A buffer to produce decoded characters.  */
 214   int *charbuf = coding->charbuf + coding->charbuf_used;
 215   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 216   int multibytep = coding->src_multibyte;
 217
 218   while (1)
 219     {
 220       src_base = src;
 221       if (charbuf < charbuf_end)
 222         /* No more room to produce a decoded character.  */
 223         break;
 224       ONE_MORE_BYTE (c);
 225       /* Decode it. */
 226     }
 227
 228  no_more_source:
 229   if (src_base < src_end
 230       && coding->mode & CODING_MODE_LAST_BLOCK)
 231     /* If the source ends by partial bytes to construct a character,
 232        treat them as eight-bit raw data.  */
 233     while (src_base < src_end && charbuf < charbuf_end)
 234       *charbuf++ = *src_base++;
 235   /* Remember how many bytes and characters we consumed.  If the
 236      source is multibyte, the bytes and chars are not identical.  */
 237   coding->consumed = coding->consumed_char = src_base - coding->source;
 238   /* Remember how many characters we produced.  */
 239   coding->charbuf_used = charbuf - coding->charbuf;
 240 }
 241 #endif
 242
 243 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 244
 245   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 246   internal multibyte format by CODING.  The resulting byte sequence
 247   goes to a place pointed to by DESTINATION, the length of which
 248   should not exceed DST_BYTES.
 249
 250   These functions set the information of original and encoded texts in
 251   the members produced, produced_char, consumed, and consumed_char of
 252   the structure *CODING.  They also set the member result to one of
 253   CODING_RESULT_XXX indicating how the encoding finished.
 254
 255   DST_BYTES zero means that source area and destination area are
 256   overlapped, which means that we can produce a encoded text until it
 257   reaches at the head of not-yet-encoded source text.
 258
 259   Below is a template of these functions.  */
 260 #if 0
 261 static void
 262 encode_coding_XXX (struct coding_system *coding)
 263 {
 264   int multibytep = coding->dst_multibyte;
 265   int *charbuf = coding->charbuf;
 266   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 267   unsigned char *dst = coding->destination + coding->produced;
 268   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 269   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 270   int produced_chars = 0;
 271
 272   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 273     {
 274       int c = *charbuf;
 275       /* Encode C into DST, and increment DST.  */
 276     }
 277  label_no_more_destination:
 278   /* How many chars and bytes we produced.  */
 279   coding->produced_char += produced_chars;
 280   coding->produced = dst - coding->destination;
 281 }
 282 #endif
 283
 284 \f
 285 /*** 1. Preamble ***/
 286
 287 #include <config.h>
 288 #include <stdio.h>
 289 #include <setjmp.h>
 290
 291 #include "lisp.h"
 292 #include "buffer.h"
 293 #include "character.h"
 294 #include "charset.h"
 295 #include "ccl.h"
 296 #include "composite.h"
 297 #include "coding.h"
 298 #include "window.h"
 299 #include "frame.h"
 300 #include "termhooks.h"
 301
 302 Lisp_Object Vcoding_system_hash_table;
 303
 304 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 305 Lisp_Object Qunix, Qdos;
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 315 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 316 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 317 Lisp_Object QCascii_compatible_p;
 318
 319 Lisp_Object Qcall_process, Qcall_process_region;
 320 Lisp_Object Qstart_process, Qopen_network_stream;
 321 Lisp_Object Qtarget_idx;
 322
 323 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 324 Lisp_Object Qinterrupted, Qinsufficient_memory;
 325
 326 /* If a symbol has this property, evaluate the value to define the
 327    symbol as a coding system.  */
 328 static Lisp_Object Qcoding_system_define_form;
 329
 330 int coding_system_require_warning;
 331
 332 Lisp_Object Vselect_safe_coding_system_function;
 333
 334 /* Mnemonic string for each format of end-of-line.  */
 335 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 336 /* Mnemonic string to indicate format of end-of-line is not yet
 337    decided.  */
 338 Lisp_Object eol_mnemonic_undecided;
 339
 340 /* Format of end-of-line decided by system.  This is Qunix on
 341    Unix and Mac, Qdos on DOS/Windows.
 342    This has an effect only for external encoding (i.e. for output to
 343    file and process), not for in-buffer or Lisp string encoding.  */
 344 static Lisp_Object system_eol_type;
 345
 346 #ifdef emacs
 347
 348 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 349
 350 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 351
 352 /* Coding system emacs-mule and raw-text are for converting only
 353    end-of-line format.  */
 354 Lisp_Object Qemacs_mule, Qraw_text;
 355 Lisp_Object Qutf_8_emacs;
 356
 357 /* Coding-systems are handed between Emacs Lisp programs and C internal
 358    routines by the following three variables.  */
 359 /* Coding-system for reading files and receiving data from process.  */
 360 Lisp_Object Vcoding_system_for_read;
 361 /* Coding-system for writing files and sending data to process.  */
 362 Lisp_Object Vcoding_system_for_write;
 363 /* Coding-system actually used in the latest I/O.  */
 364 Lisp_Object Vlast_coding_system_used;
 365 /* Set to non-nil when an error is detected while code conversion.  */
 366 Lisp_Object Vlast_code_conversion_error;
 367 /* A vector of length 256 which contains information about special
 368    Latin codes (especially for dealing with Microsoft codes).  */
 369 Lisp_Object Vlatin_extra_code_table;
 370
 371 /* Flag to inhibit code conversion of end-of-line format.  */
 372 int inhibit_eol_conversion;
 373
 374 /* Flag to inhibit ISO2022 escape sequence detection.  */
 375 int inhibit_iso_escape_detection;
 376
 377 /* Flag to inhibit detection of binary files through null bytes.  */
 378 int inhibit_null_byte_detection;
 379
 380 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 381 int inherit_process_coding_system;
 382
 383 /* Coding system to be used to encode text for terminal display when
 384    terminal coding system is nil.  */
 385 struct coding_system safe_terminal_coding;
 386
 387 Lisp_Object Vfile_coding_system_alist;
 388 Lisp_Object Vprocess_coding_system_alist;
 389 Lisp_Object Vnetwork_coding_system_alist;
 390
 391 Lisp_Object Vlocale_coding_system;
 392
 393 #endif /* emacs */
 394
 395 /* Flag to tell if we look up translation table on character code
 396    conversion.  */
 397 Lisp_Object Venable_character_translation;
 398 /* Standard translation table to look up on decoding (reading).  */
 399 Lisp_Object Vstandard_translation_table_for_decode;
 400 /* Standard translation table to look up on encoding (writing).  */
 401 Lisp_Object Vstandard_translation_table_for_encode;
 402
 403 Lisp_Object Qtranslation_table;
 404 Lisp_Object Qtranslation_table_id;
 405 Lisp_Object Qtranslation_table_for_decode;
 406 Lisp_Object Qtranslation_table_for_encode;
 407
 408 /* Alist of charsets vs revision number.  */
 409 static Lisp_Object Vcharset_revision_table;
 410
 411 /* Default coding systems used for process I/O.  */
 412 Lisp_Object Vdefault_process_coding_system;
 413
 414 /* Char table for translating Quail and self-inserting input.  */
 415 Lisp_Object Vtranslation_table_for_input;
 416
 417 /* Two special coding systems.  */
 418 Lisp_Object Vsjis_coding_system;
 419 Lisp_Object Vbig5_coding_system;
 420
 421 /* ISO2022 section */
 422
 423 #define CODING_ISO_INITIAL(coding, reg)                 \
 424   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 425                      coding_attr_iso_initial),          \
 426                reg)))
 427
 428
 429 #define CODING_ISO_REQUEST(coding, charset_id)          \
 430   (((charset_id) <= (coding)->max_charset_id            \
 431     ? ((coding)->safe_charsets[charset_id] != 255       \
 432        ? (coding)->safe_charsets[charset_id]            \
 433        : -1)                                            \
 434     : -1))
 435
 436
 437 #define CODING_ISO_FLAGS(coding)        \
 438   ((coding)->spec.iso_2022.flags)
 439 #define CODING_ISO_DESIGNATION(coding, reg)     \
 440   ((coding)->spec.iso_2022.current_designation[reg])
 441 #define CODING_ISO_INVOCATION(coding, plane)    \
 442   ((coding)->spec.iso_2022.current_invocation[plane])
 443 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 444   ((coding)->spec.iso_2022.single_shifting)
 445 #define CODING_ISO_BOL(coding)  \
 446   ((coding)->spec.iso_2022.bol)
 447 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 448   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 449 #define CODING_ISO_CMP_STATUS(coding)   \
 450   (&(coding)->spec.iso_2022.cmp_status)
 451 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 452   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 453 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 454   ((coding)->spec.iso_2022.embedded_utf_8)
 455
 456 /* Control characters of ISO2022.  */
 457                         /* code */      /* function */
 458 #define ISO_CODE_LF     0x0A            /* line-feed */
 459 #define ISO_CODE_CR     0x0D            /* carriage-return */
 460 #define ISO_CODE_SO     0x0E            /* shift-out */
 461 #define ISO_CODE_SI     0x0F            /* shift-in */
 462 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 463 #define ISO_CODE_ESC    0x1B            /* escape */
 464 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 465 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 466 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 467
 468 /* All code (1-byte) of ISO2022 is classified into one of the
 469    followings.  */
 470 enum iso_code_class_type
 471   {
 472     ISO_control_0,              /* Control codes in the range
 473                                    0x00..0x1F and 0x7F, except for the
 474                                    following 5 codes.  */
 475     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 476     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 477     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 478     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 479     ISO_control_1,              /* Control codes in the range
 480                                    0x80..0x9F, except for the
 481                                    following 3 codes.  */
 482     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 483     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 484     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 485     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 486     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 487     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 488     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 489   };
 490
 491 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 492     `iso-flags' attribute of an iso2022 coding system.  */
 493
 494 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 495    instead of the correct short-form sequence (e.g. ESC $ A).  */
 496 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 497
 498 /* If set, reset graphic planes and registers at end-of-line to the
 499    initial state.  */
 500 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 501
 502 /* If set, reset graphic planes and registers before any control
 503    characters to the initial state.  */
 504 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 505
 506 /* If set, encode by 7-bit environment.  */
 507 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 508
 509 /* If set, use locking-shift function.  */
 510 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 511
 512 /* If set, use single-shift function.  Overwrite
 513    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 514 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 515
 516 /* If set, use designation escape sequence.  */
 517 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 518
 519 /* If set, produce revision number sequence.  */
 520 #define CODING_ISO_FLAG_REVISION        0x0080
 521
 522 /* If set, produce ISO6429's direction specifying sequence.  */
 523 #define CODING_ISO_FLAG_DIRECTION       0x0100
 524
 525 /* If set, assume designation states are reset at beginning of line on
 526    output.  */
 527 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 528
 529 /* If set, designation sequence should be placed at beginning of line
 530    on output.  */
 531 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 532
 533 /* If set, do not encode unsafe charactes on output.  */
 534 #define CODING_ISO_FLAG_SAFE            0x0800
 535
 536 /* If set, extra latin codes (128..159) are accepted as a valid code
 537    on input.  */
 538 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 539
 540 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 541
 542 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 543
 544 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 545
 546 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 547
 548 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 549
 550 /* A character to be produced on output if encoding of the original
 551    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 552 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 553
 554 /* UTF-8 section */
 555 #define CODING_UTF_8_BOM(coding)        \
 556   ((coding)->spec.utf_8_bom)
 557
 558 /* UTF-16 section */
 559 #define CODING_UTF_16_BOM(coding)       \
 560   ((coding)->spec.utf_16.bom)
 561
 562 #define CODING_UTF_16_ENDIAN(coding)    \
 563   ((coding)->spec.utf_16.endian)
 564
 565 #define CODING_UTF_16_SURROGATE(coding) \
 566   ((coding)->spec.utf_16.surrogate)
 567
 568
 569 /* CCL section */
 570 #define CODING_CCL_DECODER(coding)      \
 571   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 572 #define CODING_CCL_ENCODER(coding)      \
 573   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 574 #define CODING_CCL_VALIDS(coding)                                          \
 575   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 576
 577 /* Index for each coding category in `coding_categories' */
 578
 579 enum coding_category
 580   {
 581     coding_category_iso_7,
 582     coding_category_iso_7_tight,
 583     coding_category_iso_8_1,
 584     coding_category_iso_8_2,
 585     coding_category_iso_7_else,
 586     coding_category_iso_8_else,
 587     coding_category_utf_8_auto,
 588     coding_category_utf_8_nosig,
 589     coding_category_utf_8_sig,
 590     coding_category_utf_16_auto,
 591     coding_category_utf_16_be,
 592     coding_category_utf_16_le,
 593     coding_category_utf_16_be_nosig,
 594     coding_category_utf_16_le_nosig,
 595     coding_category_charset,
 596     coding_category_sjis,
 597     coding_category_big5,
 598     coding_category_ccl,
 599     coding_category_emacs_mule,
 600     /* All above are targets of code detection.  */
 601     coding_category_raw_text,
 602     coding_category_undecided,
 603     coding_category_max
 604   };
 605
 606 /* Definitions of flag bits used in detect_coding_XXXX.  */
 607 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 608 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 609 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 610 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 611 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 612 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 613 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 614 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 615 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 616 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 617 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 618 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 619 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 620 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 621 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 622 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 623 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 624 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 625 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 626 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 627
 628 /* This value is returned if detect_coding_mask () find nothing other
 629    than ASCII characters.  */
 630 #define CATEGORY_MASK_ANY               \
 631   (CATEGORY_MASK_ISO_7                  \
 632    | CATEGORY_MASK_ISO_7_TIGHT          \
 633    | CATEGORY_MASK_ISO_8_1              \
 634    | CATEGORY_MASK_ISO_8_2              \
 635    | CATEGORY_MASK_ISO_7_ELSE           \
 636    | CATEGORY_MASK_ISO_8_ELSE           \
 637    | CATEGORY_MASK_UTF_8_AUTO           \
 638    | CATEGORY_MASK_UTF_8_NOSIG          \
 639    | CATEGORY_MASK_UTF_8_SIG            \
 640    | CATEGORY_MASK_UTF_16_AUTO          \
 641    | CATEGORY_MASK_UTF_16_BE            \
 642    | CATEGORY_MASK_UTF_16_LE            \
 643    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 644    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 645    | CATEGORY_MASK_CHARSET              \
 646    | CATEGORY_MASK_SJIS                 \
 647    | CATEGORY_MASK_BIG5                 \
 648    | CATEGORY_MASK_CCL                  \
 649    | CATEGORY_MASK_EMACS_MULE)
 650
 651
 652 #define CATEGORY_MASK_ISO_7BIT \
 653   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 654
 655 #define CATEGORY_MASK_ISO_8BIT \
 656   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 657
 658 #define CATEGORY_MASK_ISO_ELSE \
 659   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 660
 661 #define CATEGORY_MASK_ISO_ESCAPE        \
 662   (CATEGORY_MASK_ISO_7                  \
 663    | CATEGORY_MASK_ISO_7_TIGHT          \
 664    | CATEGORY_MASK_ISO_7_ELSE           \
 665    | CATEGORY_MASK_ISO_8_ELSE)
 666
 667 #define CATEGORY_MASK_ISO       \
 668   (  CATEGORY_MASK_ISO_7BIT     \
 669      | CATEGORY_MASK_ISO_8BIT   \
 670      | CATEGORY_MASK_ISO_ELSE)
 671
 672 #define CATEGORY_MASK_UTF_16            \
 673   (CATEGORY_MASK_UTF_16_AUTO            \
 674    | CATEGORY_MASK_UTF_16_BE            \
 675    | CATEGORY_MASK_UTF_16_LE            \
 676    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 677    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 678
 679 #define CATEGORY_MASK_UTF_8     \
 680   (CATEGORY_MASK_UTF_8_AUTO     \
 681    | CATEGORY_MASK_UTF_8_NOSIG  \
 682    | CATEGORY_MASK_UTF_8_SIG)
 683
 684 /* List of symbols `coding-category-xxx' ordered by priority.  This
 685    variable is exposed to Emacs Lisp.  */
 686 static Lisp_Object Vcoding_category_list;
 687
 688 /* Table of coding categories (Lisp symbols).  This variable is for
 689    internal use oly.  */
 690 static Lisp_Object Vcoding_category_table;
 691
 692 /* Table of coding-categories ordered by priority.  */
 693 static enum coding_category coding_priorities[coding_category_max];
 694
 695 /* Nth element is a coding context for the coding system bound to the
 696    Nth coding category.  */
 697 static struct coding_system coding_categories[coding_category_max];
 698
 699 /*** Commonly used macros and functions ***/
 700
 701 #ifndef min
 702 #define min(a, b) ((a) < (b) ? (a) : (b))
 703 #endif
 704 #ifndef max
 705 #define max(a, b) ((a) > (b) ? (a) : (b))
 706 #endif
 707
 708 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 709   do {                                                  \
 710     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 711     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 712   } while (0)
 713
 714
 715 /* Safely get one byte from the source text pointed by SRC which ends
 716    at SRC_END, and set C to that byte.  If there are not enough bytes
 717    in the source, it jumps to `no_more_source'.  If multibytep is
 718    nonzero, and a multibyte character is found at SRC, set C to the
 719    negative value of the character code.  The caller should declare
 720    and set these variables appropriately in advance:
 721         src, src_end, multibytep */
 722
 723 #define ONE_MORE_BYTE(c)                                \
 724   do {                                                  \
 725     if (src == src_end)                                 \
 726       {                                                 \
 727         if (src_base < src)                             \
 728           record_conversion_result                      \
 729             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 730         goto no_more_source;                            \
 731       }                                                 \
 732     c = *src++;                                         \
 733     if (multibytep && (c & 0x80))                       \
 734       {                                                 \
 735         if ((c & 0xFE) == 0xC0)                         \
 736           c = ((c & 1) << 6) | *src++;                  \
 737         else                                            \
 738           {                                             \
 739             src--;                                      \
 740             c = - string_char (src, &src, NULL);        \
 741             record_conversion_result                    \
 742               (coding, CODING_RESULT_INVALID_SRC);      \
 743           }                                             \
 744       }                                                 \
 745     consumed_chars++;                                   \
 746   } while (0)
 747
 748 /* Safely get two bytes from the source text pointed by SRC which ends
 749    at SRC_END, and set C1 and C2 to those bytes while skipping the
 750    heading multibyte characters.  If there are not enough bytes in the
 751    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 752    a multibyte character is found for C2, set C2 to the negative value
 753    of the character code.  The caller should declare and set these
 754    variables appropriately in advance:
 755         src, src_end, multibytep
 756    It is intended that this macro is used in detect_coding_utf_16.  */
 757
 758 #define TWO_MORE_BYTES(c1, c2)                          \
 759   do {                                                  \
 760     do {                                                \
 761       if (src == src_end)                               \
 762         goto no_more_source;                            \
 763       c1 = *src++;                                      \
 764       if (multibytep && (c1 & 0x80))                    \
 765         {                                               \
 766           if ((c1 & 0xFE) == 0xC0)                      \
 767             c1 = ((c1 & 1) << 6) | *src++;              \
 768           else                                          \
 769             {                                           \
 770               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 771               c1 = -1;                                  \
 772             }                                           \
 773         }                                               \
 774     } while (c1 < 0);                                   \
 775     if (src == src_end)                                 \
 776       goto no_more_source;                              \
 777     c2 = *src++;                                        \
 778     if (multibytep && (c2 & 0x80))                      \
 779       {                                                 \
 780         if ((c2 & 0xFE) == 0xC0)                        \
 781           c2 = ((c2 & 1) << 6) | *src++;                \
 782         else                                            \
 783           c2 = -1;                                      \
 784       }                                                 \
 785   } while (0)
 786
 787
 788 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 789   do {                                                  \
 790     c = *src++;                                         \
 791     if (multibytep && (c & 0x80))                       \
 792       {                                                 \
 793         if ((c & 0xFE) == 0xC0)                         \
 794           c = ((c & 1) << 6) | *src++;                  \
 795         else                                            \
 796           {                                             \
 797             src--;                                      \
 798             c = - string_char (src, &src, NULL);        \
 799             record_conversion_result                    \
 800               (coding, CODING_RESULT_INVALID_SRC);      \
 801           }                                             \
 802       }                                                 \
 803     consumed_chars++;                                   \
 804   } while (0)
 805
 806
 807 /* Store a byte C in the place pointed by DST and increment DST to the
 808    next free point, and increment PRODUCED_CHARS.  The caller should
 809    assure that C is 0..127, and declare and set the variable `dst'
 810    appropriately in advance.
 811 */
 812
 813
 814 #define EMIT_ONE_ASCII_BYTE(c)  \
 815   do {                          \
 816     produced_chars++;           \
 817     *dst++ = (c);               \
 818   } while (0)
 819
 820
 821 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 822
 823 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 824   do {                                  \
 825     produced_chars += 2;                \
 826     *dst++ = (c1), *dst++ = (c2);       \
 827   } while (0)
 828
 829
 830 /* Store a byte C in the place pointed by DST and increment DST to the
 831    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 832    nonzero, store in an appropriate multibyte from.  The caller should
 833    declare and set the variables `dst' and `multibytep' appropriately
 834    in advance.  */
 835
 836 #define EMIT_ONE_BYTE(c)                \
 837   do {                                  \
 838     produced_chars++;                   \
 839     if (multibytep)                     \
 840       {                                 \
 841         int ch = (c);                   \
 842         if (ch >= 0x80)                 \
 843           ch = BYTE8_TO_CHAR (ch);      \
 844         CHAR_STRING_ADVANCE (ch, dst);  \
 845       }                                 \
 846     else                                \
 847       *dst++ = (c);                     \
 848   } while (0)
 849
 850
 851 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 852
 853 #define EMIT_TWO_BYTES(c1, c2)          \
 854   do {                                  \
 855     produced_chars += 2;                \
 856     if (multibytep)                     \
 857       {                                 \
 858         int ch;                         \
 859                                         \
 860         ch = (c1);                      \
 861         if (ch >= 0x80)                 \
 862           ch = BYTE8_TO_CHAR (ch);      \
 863         CHAR_STRING_ADVANCE (ch, dst);  \
 864         ch = (c2);                      \
 865         if (ch >= 0x80)                 \
 866           ch = BYTE8_TO_CHAR (ch);      \
 867         CHAR_STRING_ADVANCE (ch, dst);  \
 868       }                                 \
 869     else                                \
 870       {                                 \
 871         *dst++ = (c1);                  \
 872         *dst++ = (c2);                  \
 873       }                                 \
 874   } while (0)
 875
 876
 877 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 878   do {                                  \
 879     EMIT_ONE_BYTE (c1);                 \
 880     EMIT_TWO_BYTES (c2, c3);            \
 881   } while (0)
 882
 883
 884 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 885   do {                                          \
 886     EMIT_TWO_BYTES (c1, c2);                    \
 887     EMIT_TWO_BYTES (c3, c4);                    \
 888   } while (0)
 889
 890
 891 /* Prototypes for static functions.  */
 892 static void record_conversion_result (struct coding_system *coding,
 893                                       enum coding_result_code result);
 894 static int detect_coding_utf_8 (struct coding_system *,
 895                                 struct coding_detection_info *info);
 896 static void decode_coding_utf_8 (struct coding_system *);
 897 static int encode_coding_utf_8 (struct coding_system *);
 898
 899 static int detect_coding_utf_16 (struct coding_system *,
 900                                  struct coding_detection_info *info);
 901 static void decode_coding_utf_16 (struct coding_system *);
 902 static int encode_coding_utf_16 (struct coding_system *);
 903
 904 static int detect_coding_iso_2022 (struct coding_system *,
 905                                    struct coding_detection_info *info);
 906 static void decode_coding_iso_2022 (struct coding_system *);
 907 static int encode_coding_iso_2022 (struct coding_system *);
 908
 909 static int detect_coding_emacs_mule (struct coding_system *,
 910                                      struct coding_detection_info *info);
 911 static void decode_coding_emacs_mule (struct coding_system *);
 912 static int encode_coding_emacs_mule (struct coding_system *);
 913
 914 static int detect_coding_sjis (struct coding_system *,
 915                                struct coding_detection_info *info);
 916 static void decode_coding_sjis (struct coding_system *);
 917 static int encode_coding_sjis (struct coding_system *);
 918
 919 static int detect_coding_big5 (struct coding_system *,
 920                                struct coding_detection_info *info);
 921 static void decode_coding_big5 (struct coding_system *);
 922 static int encode_coding_big5 (struct coding_system *);
 923
 924 static int detect_coding_ccl (struct coding_system *,
 925                               struct coding_detection_info *info);
 926 static void decode_coding_ccl (struct coding_system *);
 927 static int encode_coding_ccl (struct coding_system *);
 928
 929 static void decode_coding_raw_text (struct coding_system *);
 930 static int encode_coding_raw_text (struct coding_system *);
 931
 932 static void coding_set_source (struct coding_system *);
 933 static void coding_set_destination (struct coding_system *);
 934 static void coding_alloc_by_realloc (struct coding_system *, EMACS_INT);
 935 static void coding_alloc_by_making_gap (struct coding_system *,
 936                                         EMACS_INT, EMACS_INT);
 937 static unsigned char *alloc_destination (struct coding_system *,
 938                                          EMACS_INT, unsigned char *);
 939 static void setup_iso_safe_charsets (Lisp_Object);
 940 static unsigned char *encode_designation_at_bol (struct coding_system *,
 941                                                  int *, int *,
 942                                                  unsigned char *);
 943 static int detect_eol (const unsigned char *,
 944                        EMACS_INT, enum coding_category);
 945 static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
 946 static void decode_eol (struct coding_system *);
 947 static Lisp_Object get_translation_table (Lisp_Object, int, int *);
 948 static Lisp_Object get_translation (Lisp_Object, int *, int *);
 949 static int produce_chars (struct coding_system *, Lisp_Object, int);
 950 static INLINE void produce_charset (struct coding_system *, int *,
 951                                     EMACS_INT);
 952 static void produce_annotation (struct coding_system *, EMACS_INT);
 953 static int decode_coding (struct coding_system *);
 954 static INLINE int *handle_composition_annotation (EMACS_INT, EMACS_INT,
 955                                                   struct coding_system *,
 956                                                   int *, EMACS_INT *);
 957 static INLINE int *handle_charset_annotation (EMACS_INT, EMACS_INT,
 958                                               struct coding_system *,
 959                                               int *, EMACS_INT *);
 960 static void consume_chars (struct coding_system *, Lisp_Object, int);
 961 static int encode_coding (struct coding_system *);
 962 static Lisp_Object make_conversion_work_buffer (int);
 963 static Lisp_Object code_conversion_restore (Lisp_Object);
 964 static INLINE int char_encodable_p (int, Lisp_Object);
 965 static Lisp_Object make_subsidiaries (Lisp_Object);
 966
 967 static void
 968 record_conversion_result (struct coding_system *coding,
 969                           enum coding_result_code result)
 970 {
 971   coding->result = result;
 972   switch (result)
 973     {
 974     case CODING_RESULT_INSUFFICIENT_SRC:
 975       Vlast_code_conversion_error = Qinsufficient_source;
 976       break;
 977     case CODING_RESULT_INCONSISTENT_EOL:
 978       Vlast_code_conversion_error = Qinconsistent_eol;
 979       break;
 980     case CODING_RESULT_INVALID_SRC:
 981       Vlast_code_conversion_error = Qinvalid_source;
 982       break;
 983     case CODING_RESULT_INTERRUPT:
 984       Vlast_code_conversion_error = Qinterrupted;
 985       break;
 986     case CODING_RESULT_INSUFFICIENT_MEM:
 987       Vlast_code_conversion_error = Qinsufficient_memory;
 988       break;
 989     case CODING_RESULT_INSUFFICIENT_DST:
 990       /* Don't record this error in Vlast_code_conversion_error
 991          because it happens just temporarily and is resolved when the
 992          whole conversion is finished.  */
 993       break;
 994     case CODING_RESULT_SUCCESS:
 995       break;
 996     default:
 997       Vlast_code_conversion_error = intern ("Unknown error");
 998     }
 999 }
1000
1001 /* This wrapper macro is used to preserve validity of pointers into
1002    buffer text across calls to decode_char, which could cause
1003    relocation of buffers if it loads a charset map, because loading a
1004    charset map allocates large structures.  */
1005 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1006   do {                                                                       \
1007     charset_map_loaded = 0;                                                  \
1008     c = DECODE_CHAR (charset, code);                                         \
1009     if (charset_map_loaded)                                                  \
1010       {                                                                      \
1011         const unsigned char *orig = coding->source;                          \
1012         EMACS_INT offset;                                                    \
1013                                                                              \
1014         coding_set_source (coding);                                          \
1015         offset = coding->source - orig;                                      \
1016         src += offset;                                                       \
1017         src_base += offset;                                                  \
1018         src_end += offset;                                                   \
1019       }                                                                      \
1020   } while (0)
1021
1022
1023 /* If there are at least BYTES length of room at dst, allocate memory
1024    for coding->destination and update dst and dst_end.  We don't have
1025    to take care of coding->source which will be relocated.  It is
1026    handled by calling coding_set_source in encode_coding.  */
1027
1028 #define ASSURE_DESTINATION(bytes)                               \
1029   do {                                                          \
1030     if (dst + (bytes) >= dst_end)                               \
1031       {                                                         \
1032         int more_bytes = charbuf_end - charbuf + (bytes);       \
1033                                                                 \
1034         dst = alloc_destination (coding, more_bytes, dst);      \
1035         dst_end = coding->destination + coding->dst_bytes;      \
1036       }                                                         \
1037   } while (0)
1038
1039
1040 /* Store multibyte form of the character C in P, and advance P to the
1041    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1042    never calls MAYBE_UNIFY_CHAR.  */
1043
1044 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1045   do {                                          \
1046     if ((c) <= MAX_1_BYTE_CHAR)                 \
1047       *(p)++ = (c);                             \
1048     else if ((c) <= MAX_2_BYTE_CHAR)            \
1049       *(p)++ = (0xC0 | ((c) >> 6)),             \
1050         *(p)++ = (0x80 | ((c) & 0x3F));         \
1051     else if ((c) <= MAX_3_BYTE_CHAR)            \
1052       *(p)++ = (0xE0 | ((c) >> 12)),            \
1053         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1054         *(p)++ = (0x80 | ((c) & 0x3F));         \
1055     else if ((c) <= MAX_4_BYTE_CHAR)            \
1056       *(p)++ = (0xF0 | (c >> 18)),              \
1057         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1058         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1059         *(p)++ = (0x80 | (c & 0x3F));           \
1060     else if ((c) <= MAX_5_BYTE_CHAR)            \
1061       *(p)++ = 0xF8,                            \
1062         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1063         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1064         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1065         *(p)++ = (0x80 | (c & 0x3F));           \
1066     else                                        \
1067       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1068   } while (0)
1069
1070
1071 /* Return the character code of character whose multibyte form is at
1072    P, and advance P to the end of the multibyte form.  This is like
1073    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1074
1075 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1076   (!((p)[0] & 0x80)                                             \
1077    ? *(p)++                                                     \
1078    : ! ((p)[0] & 0x20)                                          \
1079    ? ((p) += 2,                                                 \
1080       ((((p)[-2] & 0x1F) << 6)                                  \
1081        | ((p)[-1] & 0x3F)                                       \
1082        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1083    : ! ((p)[0] & 0x10)                                          \
1084    ? ((p) += 3,                                                 \
1085       ((((p)[-3] & 0x0F) << 12)                                 \
1086        | (((p)[-2] & 0x3F) << 6)                                \
1087        | ((p)[-1] & 0x3F)))                                     \
1088    : ! ((p)[0] & 0x08)                                          \
1089    ? ((p) += 4,                                                 \
1090       ((((p)[-4] & 0xF) << 18)                                  \
1091        | (((p)[-3] & 0x3F) << 12)                               \
1092        | (((p)[-2] & 0x3F) << 6)                                \
1093        | ((p)[-1] & 0x3F)))                                     \
1094    : ((p) += 5,                                                 \
1095       ((((p)[-4] & 0x3F) << 18)                                 \
1096        | (((p)[-3] & 0x3F) << 12)                               \
1097        | (((p)[-2] & 0x3F) << 6)                                \
1098        | ((p)[-1] & 0x3F))))
1099
1100
1101 static void
1102 coding_set_source (struct coding_system *coding)
1103 {
1104   if (BUFFERP (coding->src_object))
1105     {
1106       struct buffer *buf = XBUFFER (coding->src_object);
1107
1108       if (coding->src_pos < 0)
1109         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1110       else
1111         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1112     }
1113   else if (STRINGP (coding->src_object))
1114     {
1115       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1116     }
1117   else
1118     /* Otherwise, the source is C string and is never relocated
1119        automatically.  Thus we don't have to update anything.  */
1120     ;
1121 }
1122
1123 static void
1124 coding_set_destination (struct coding_system *coding)
1125 {
1126   if (BUFFERP (coding->dst_object))
1127     {
1128       if (coding->src_pos < 0)
1129         {
1130           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1131           coding->dst_bytes = (GAP_END_ADDR
1132                                - (coding->src_bytes - coding->consumed)
1133                                - coding->destination);
1134         }
1135       else
1136         {
1137           /* We are sure that coding->dst_pos_byte is before the gap
1138              of the buffer. */
1139           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1140                                  + coding->dst_pos_byte - BEG_BYTE);
1141           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1142                                - coding->destination);
1143         }
1144     }
1145   else
1146     /* Otherwise, the destination is C string and is never relocated
1147        automatically.  Thus we don't have to update anything.  */
1148     ;
1149 }
1150
1151
1152 static void
1153 coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes)
1154 {
1155   coding->destination = (unsigned char *) xrealloc (coding->destination,
1156                                                     coding->dst_bytes + bytes);
1157   coding->dst_bytes += bytes;
1158 }
1159
1160 static void
1161 coding_alloc_by_making_gap (struct coding_system *coding,
1162                             EMACS_INT gap_head_used, EMACS_INT bytes)
1163 {
1164   if (EQ (coding->src_object, coding->dst_object))
1165     {
1166       /* The gap may contain the produced data at the head and not-yet
1167          consumed data at the tail.  To preserve those data, we at
1168          first make the gap size to zero, then increase the gap
1169          size.  */
1170       EMACS_INT add = GAP_SIZE;
1171
1172       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1173       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1174       make_gap (bytes);
1175       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1176       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1177     }
1178   else
1179     {
1180       Lisp_Object this_buffer;
1181
1182       this_buffer = Fcurrent_buffer ();
1183       set_buffer_internal (XBUFFER (coding->dst_object));
1184       make_gap (bytes);
1185       set_buffer_internal (XBUFFER (this_buffer));
1186     }
1187 }
1188
1189
1190 static unsigned char *
1191 alloc_destination (struct coding_system *coding, EMACS_INT nbytes,
1192                    unsigned char *dst)
1193 {
1194   EMACS_INT offset = dst - coding->destination;
1195
1196   if (BUFFERP (coding->dst_object))
1197     {
1198       struct buffer *buf = XBUFFER (coding->dst_object);
1199
1200       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1201     }
1202   else
1203     coding_alloc_by_realloc (coding, nbytes);
1204   coding_set_destination (coding);
1205   dst = coding->destination + offset;
1206   return dst;
1207 }
1208
1209 /** Macros for annotations.  */
1210
1211 /* An annotation data is stored in the array coding->charbuf in this
1212    format:
1213      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1214    LENGTH is the number of elements in the annotation.
1215    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1216    NCHARS is the number of characters in the text annotated.
1217
1218    The format of the following elements depend on ANNOTATION_MASK.
1219
1220    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1221    follows:
1222      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1223
1224    NBYTES is the number of bytes specified in the header part of
1225    old-style emacs-mule encoding, or 0 for the other kind of
1226    composition.
1227
1228    METHOD is one of enum composition_method.
1229
1230    Optionnal COMPOSITION-COMPONENTS are characters and composition
1231    rules.
1232
1233    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1234    follows.
1235
1236    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1237    recover from an invalid annotation, and should be skipped by
1238    produce_annotation.  */
1239
1240 /* Maximum length of the header of annotation data.  */
1241 #define MAX_ANNOTATION_LENGTH 5
1242
1243 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1244   do {                                                  \
1245     *(buf)++ = -(len);                                  \
1246     *(buf)++ = (mask);                                  \
1247     *(buf)++ = (nchars);                                \
1248     coding->annotated = 1;                              \
1249   } while (0);
1250
1251 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1252   do {                                                                      \
1253     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1254     *buf++ = nbytes;                                                        \
1255     *buf++ = method;                                                        \
1256   } while (0)
1257
1258
1259 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1260   do {                                                                  \
1261     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1262     *buf++ = id;                                                        \
1263   } while (0)
1264
1265 \f
1266 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1267
1268
1269
1270 \f
1271 /*** 3. UTF-8 ***/
1272
1273 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1274    Check if a text is encoded in UTF-8.  If it is, return 1, else
1275    return 0.  */
1276
1277 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1278 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1279 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1280 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1281 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1282 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1283
1284 #define UTF_BOM 0xFEFF
1285 #define UTF_8_BOM_1 0xEF
1286 #define UTF_8_BOM_2 0xBB
1287 #define UTF_8_BOM_3 0xBF
1288
1289 static int
1290 detect_coding_utf_8 (struct coding_system *coding,
1291                      struct coding_detection_info *detect_info)
1292 {
1293   const unsigned char *src = coding->source, *src_base;
1294   const unsigned char *src_end = coding->source + coding->src_bytes;
1295   int multibytep = coding->src_multibyte;
1296   int consumed_chars = 0;
1297   int bom_found = 0;
1298   int found = 0;
1299
1300   detect_info->checked |= CATEGORY_MASK_UTF_8;
1301   /* A coding system of this category is always ASCII compatible.  */
1302   src += coding->head_ascii;
1303
1304   while (1)
1305     {
1306       int c, c1, c2, c3, c4;
1307
1308       src_base = src;
1309       ONE_MORE_BYTE (c);
1310       if (c < 0 || UTF_8_1_OCTET_P (c))
1311         continue;
1312       ONE_MORE_BYTE (c1);
1313       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1314         break;
1315       if (UTF_8_2_OCTET_LEADING_P (c))
1316         {
1317           found = 1;
1318           continue;
1319         }
1320       ONE_MORE_BYTE (c2);
1321       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1322         break;
1323       if (UTF_8_3_OCTET_LEADING_P (c))
1324         {
1325           found = 1;
1326           if (src_base == coding->source
1327               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1328             bom_found = 1;
1329           continue;
1330         }
1331       ONE_MORE_BYTE (c3);
1332       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1333         break;
1334       if (UTF_8_4_OCTET_LEADING_P (c))
1335         {
1336           found = 1;
1337           continue;
1338         }
1339       ONE_MORE_BYTE (c4);
1340       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1341         break;
1342       if (UTF_8_5_OCTET_LEADING_P (c))
1343         {
1344           found = 1;
1345           continue;
1346         }
1347       break;
1348     }
1349   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1350   return 0;
1351
1352  no_more_source:
1353   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1354     {
1355       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1356       return 0;
1357     }
1358   if (bom_found)
1359     {
1360       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1361       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1362     }
1363   else
1364     {
1365       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1366       if (found)
1367         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1368     }
1369   return 1;
1370 }
1371
1372
1373 static void
1374 decode_coding_utf_8 (struct coding_system *coding)
1375 {
1376   const unsigned char *src = coding->source + coding->consumed;
1377   const unsigned char *src_end = coding->source + coding->src_bytes;
1378   const unsigned char *src_base;
1379   int *charbuf = coding->charbuf + coding->charbuf_used;
1380   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1381   int consumed_chars = 0, consumed_chars_base = 0;
1382   int multibytep = coding->src_multibyte;
1383   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1384   Lisp_Object attr, charset_list;
1385   int eol_crlf =
1386     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1387   int byte_after_cr = -1;
1388
1389   CODING_GET_INFO (coding, attr, charset_list);
1390
1391   if (bom != utf_without_bom)
1392     {
1393       int c1, c2, c3;
1394
1395       src_base = src;
1396       ONE_MORE_BYTE (c1);
1397       if (! UTF_8_3_OCTET_LEADING_P (c1))
1398         src = src_base;
1399       else
1400         {
1401           ONE_MORE_BYTE (c2);
1402           if (! UTF_8_EXTRA_OCTET_P (c2))
1403             src = src_base;
1404           else
1405             {
1406               ONE_MORE_BYTE (c3);
1407               if (! UTF_8_EXTRA_OCTET_P (c3))
1408                 src = src_base;
1409               else
1410                 {
1411                   if ((c1 != UTF_8_BOM_1)
1412                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1413                     src = src_base;
1414                   else
1415                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1416                 }
1417             }
1418         }
1419     }
1420   CODING_UTF_8_BOM (coding) = utf_without_bom;
1421
1422   while (1)
1423     {
1424       int c, c1, c2, c3, c4, c5;
1425
1426       src_base = src;
1427       consumed_chars_base = consumed_chars;
1428
1429       if (charbuf >= charbuf_end)
1430         {
1431           if (byte_after_cr >= 0)
1432             src_base--;
1433           break;
1434         }
1435
1436       if (byte_after_cr >= 0)
1437         c1 = byte_after_cr, byte_after_cr = -1;
1438       else
1439         ONE_MORE_BYTE (c1);
1440       if (c1 < 0)
1441         {
1442           c = - c1;
1443         }
1444       else if (UTF_8_1_OCTET_P (c1))
1445         {
1446           if (eol_crlf && c1 == '\r')
1447             ONE_MORE_BYTE (byte_after_cr);
1448           c = c1;
1449         }
1450       else
1451         {
1452           ONE_MORE_BYTE (c2);
1453           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1454             goto invalid_code;
1455           if (UTF_8_2_OCTET_LEADING_P (c1))
1456             {
1457               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1458               /* Reject overlong sequences here and below.  Encoders
1459                  producing them are incorrect, they can be misleading,
1460                  and they mess up read/write invariance.  */
1461               if (c < 128)
1462                 goto invalid_code;
1463             }
1464           else
1465             {
1466               ONE_MORE_BYTE (c3);
1467               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1468                 goto invalid_code;
1469               if (UTF_8_3_OCTET_LEADING_P (c1))
1470                 {
1471                   c = (((c1 & 0xF) << 12)
1472                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1473                   if (c < 0x800
1474                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1475                     goto invalid_code;
1476                 }
1477               else
1478                 {
1479                   ONE_MORE_BYTE (c4);
1480                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1481                     goto invalid_code;
1482                   if (UTF_8_4_OCTET_LEADING_P (c1))
1483                     {
1484                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1485                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1486                     if (c < 0x10000)
1487                       goto invalid_code;
1488                     }
1489                   else
1490                     {
1491                       ONE_MORE_BYTE (c5);
1492                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1493                         goto invalid_code;
1494                       if (UTF_8_5_OCTET_LEADING_P (c1))
1495                         {
1496                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1497                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1498                                | (c5 & 0x3F));
1499                           if ((c > MAX_CHAR) || (c < 0x200000))
1500                             goto invalid_code;
1501                         }
1502                       else
1503                         goto invalid_code;
1504                     }
1505                 }
1506             }
1507         }
1508
1509       *charbuf++ = c;
1510       continue;
1511
1512     invalid_code:
1513       src = src_base;
1514       consumed_chars = consumed_chars_base;
1515       ONE_MORE_BYTE (c);
1516       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1517       coding->errors++;
1518     }
1519
1520  no_more_source:
1521   coding->consumed_char += consumed_chars_base;
1522   coding->consumed = src_base - coding->source;
1523   coding->charbuf_used = charbuf - coding->charbuf;
1524 }
1525
1526
1527 static int
1528 encode_coding_utf_8 (struct coding_system *coding)
1529 {
1530   int multibytep = coding->dst_multibyte;
1531   int *charbuf = coding->charbuf;
1532   int *charbuf_end = charbuf + coding->charbuf_used;
1533   unsigned char *dst = coding->destination + coding->produced;
1534   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1535   int produced_chars = 0;
1536   int c;
1537
1538   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1539     {
1540       ASSURE_DESTINATION (3);
1541       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1542       CODING_UTF_8_BOM (coding) = utf_without_bom;
1543     }
1544
1545   if (multibytep)
1546     {
1547       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1548
1549       while (charbuf < charbuf_end)
1550         {
1551           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1552
1553           ASSURE_DESTINATION (safe_room);
1554           c = *charbuf++;
1555           if (CHAR_BYTE8_P (c))
1556             {
1557               c = CHAR_TO_BYTE8 (c);
1558               EMIT_ONE_BYTE (c);
1559             }
1560           else
1561             {
1562               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1563               for (p = str; p < pend; p++)
1564                 EMIT_ONE_BYTE (*p);
1565             }
1566         }
1567     }
1568   else
1569     {
1570       int safe_room = MAX_MULTIBYTE_LENGTH;
1571
1572       while (charbuf < charbuf_end)
1573         {
1574           ASSURE_DESTINATION (safe_room);
1575           c = *charbuf++;
1576           if (CHAR_BYTE8_P (c))
1577             *dst++ = CHAR_TO_BYTE8 (c);
1578           else
1579             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1580           produced_chars++;
1581         }
1582     }
1583   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1584   coding->produced_char += produced_chars;
1585   coding->produced = dst - coding->destination;
1586   return 0;
1587 }
1588
1589
1590 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1591    Check if a text is encoded in one of UTF-16 based coding systems.
1592    If it is, return 1, else return 0.  */
1593
1594 #define UTF_16_HIGH_SURROGATE_P(val) \
1595   (((val) & 0xFC00) == 0xD800)
1596
1597 #define UTF_16_LOW_SURROGATE_P(val) \
1598   (((val) & 0xFC00) == 0xDC00)
1599
1600 #define UTF_16_INVALID_P(val)   \
1601   (((val) == 0xFFFE)            \
1602    || ((val) == 0xFFFF)         \
1603    || UTF_16_LOW_SURROGATE_P (val))
1604
1605
1606 static int
1607 detect_coding_utf_16 (struct coding_system *coding,
1608                       struct coding_detection_info *detect_info)
1609 {
1610   const unsigned char *src = coding->source, *src_base = src;
1611   const unsigned char *src_end = coding->source + coding->src_bytes;
1612   int multibytep = coding->src_multibyte;
1613   int consumed_chars = 0;
1614   int c1, c2;
1615
1616   detect_info->checked |= CATEGORY_MASK_UTF_16;
1617   if (coding->mode & CODING_MODE_LAST_BLOCK
1618       && (coding->src_chars & 1))
1619     {
1620       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1621       return 0;
1622     }
1623
1624   TWO_MORE_BYTES (c1, c2);
1625   if ((c1 == 0xFF) && (c2 == 0xFE))
1626     {
1627       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1628                              | CATEGORY_MASK_UTF_16_AUTO);
1629       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1630                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1631                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1632     }
1633   else if ((c1 == 0xFE) && (c2 == 0xFF))
1634     {
1635       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1636                              | CATEGORY_MASK_UTF_16_AUTO);
1637       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1638                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1639                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1640     }
1641   else if (c2 < 0)
1642     {
1643       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1644       return 0;
1645     }
1646   else
1647     {
1648       /* We check the dispersion of Eth and Oth bytes where E is even and
1649          O is odd.  If both are high, we assume binary data.*/
1650       unsigned char e[256], o[256];
1651       unsigned e_num = 1, o_num = 1;
1652
1653       memset (e, 0, 256);
1654       memset (o, 0, 256);
1655       e[c1] = 1;
1656       o[c2] = 1;
1657
1658       detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1659                                 |CATEGORY_MASK_UTF_16_BE
1660                                 | CATEGORY_MASK_UTF_16_LE);
1661
1662       while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1663              != CATEGORY_MASK_UTF_16)
1664         {
1665           TWO_MORE_BYTES (c1, c2);
1666           if (c2 < 0)
1667             break;
1668           if (! e[c1])
1669             {
1670               e[c1] = 1;
1671               e_num++;
1672               if (e_num >= 128)
1673                 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1674             }
1675           if (! o[c2])
1676             {
1677               o[c2] = 1;
1678               o_num++;
1679               if (o_num >= 128)
1680                 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1681             }
1682         }
1683       return 0;
1684     }
1685
1686  no_more_source:
1687   return 1;
1688 }
1689
1690 static void
1691 decode_coding_utf_16 (struct coding_system *coding)
1692 {
1693   const unsigned char *src = coding->source + coding->consumed;
1694   const unsigned char *src_end = coding->source + coding->src_bytes;
1695   const unsigned char *src_base;
1696   int *charbuf = coding->charbuf + coding->charbuf_used;
1697   /* We may produces at most 3 chars in one loop.  */
1698   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1699   int consumed_chars = 0, consumed_chars_base = 0;
1700   int multibytep = coding->src_multibyte;
1701   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1702   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1703   int surrogate = CODING_UTF_16_SURROGATE (coding);
1704   Lisp_Object attr, charset_list;
1705   int eol_crlf =
1706     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1707   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1708
1709   CODING_GET_INFO (coding, attr, charset_list);
1710
1711   if (bom == utf_with_bom)
1712     {
1713       int c, c1, c2;
1714
1715       src_base = src;
1716       ONE_MORE_BYTE (c1);
1717       ONE_MORE_BYTE (c2);
1718       c = (c1 << 8) | c2;
1719
1720       if (endian == utf_16_big_endian
1721           ? c != 0xFEFF : c != 0xFFFE)
1722         {
1723           /* The first two bytes are not BOM.  Treat them as bytes
1724              for a normal character.  */
1725           src = src_base;
1726           coding->errors++;
1727         }
1728       CODING_UTF_16_BOM (coding) = utf_without_bom;
1729     }
1730   else if (bom == utf_detect_bom)
1731     {
1732       /* We have already tried to detect BOM and failed in
1733          detect_coding.  */
1734       CODING_UTF_16_BOM (coding) = utf_without_bom;
1735     }
1736
1737   while (1)
1738     {
1739       int c, c1, c2;
1740
1741       src_base = src;
1742       consumed_chars_base = consumed_chars;
1743
1744       if (charbuf >= charbuf_end)
1745         {
1746           if (byte_after_cr1 >= 0)
1747             src_base -= 2;
1748           break;
1749         }
1750
1751       if (byte_after_cr1 >= 0)
1752         c1 = byte_after_cr1, byte_after_cr1 = -1;
1753       else
1754         ONE_MORE_BYTE (c1);
1755       if (c1 < 0)
1756         {
1757           *charbuf++ = -c1;
1758           continue;
1759         }
1760       if (byte_after_cr2 >= 0)
1761         c2 = byte_after_cr2, byte_after_cr2 = -1;
1762       else
1763         ONE_MORE_BYTE (c2);
1764       if (c2 < 0)
1765         {
1766           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1767           *charbuf++ = -c2;
1768           continue;
1769         }
1770       c = (endian == utf_16_big_endian
1771            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1772
1773       if (surrogate)
1774         {
1775           if (! UTF_16_LOW_SURROGATE_P (c))
1776             {
1777               if (endian == utf_16_big_endian)
1778                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1779               else
1780                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1781               *charbuf++ = c1;
1782               *charbuf++ = c2;
1783               coding->errors++;
1784               if (UTF_16_HIGH_SURROGATE_P (c))
1785                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1786               else
1787                 *charbuf++ = c;
1788             }
1789           else
1790             {
1791               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1792               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1793               *charbuf++ = 0x10000 + c;
1794             }
1795         }
1796       else
1797         {
1798           if (UTF_16_HIGH_SURROGATE_P (c))
1799             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1800           else
1801             {
1802               if (eol_crlf && c == '\r')
1803                 {
1804                   ONE_MORE_BYTE (byte_after_cr1);
1805                   ONE_MORE_BYTE (byte_after_cr2);
1806                 }
1807               *charbuf++ = c;
1808             }
1809         }
1810     }
1811
1812  no_more_source:
1813   coding->consumed_char += consumed_chars_base;
1814   coding->consumed = src_base - coding->source;
1815   coding->charbuf_used = charbuf - coding->charbuf;
1816 }
1817
1818 static int
1819 encode_coding_utf_16 (struct coding_system *coding)
1820 {
1821   int multibytep = coding->dst_multibyte;
1822   int *charbuf = coding->charbuf;
1823   int *charbuf_end = charbuf + coding->charbuf_used;
1824   unsigned char *dst = coding->destination + coding->produced;
1825   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1826   int safe_room = 8;
1827   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1828   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1829   int produced_chars = 0;
1830   Lisp_Object attrs, charset_list;
1831   int c;
1832
1833   CODING_GET_INFO (coding, attrs, charset_list);
1834
1835   if (bom != utf_without_bom)
1836     {
1837       ASSURE_DESTINATION (safe_room);
1838       if (big_endian)
1839         EMIT_TWO_BYTES (0xFE, 0xFF);
1840       else
1841         EMIT_TWO_BYTES (0xFF, 0xFE);
1842       CODING_UTF_16_BOM (coding) = utf_without_bom;
1843     }
1844
1845   while (charbuf < charbuf_end)
1846     {
1847       ASSURE_DESTINATION (safe_room);
1848       c = *charbuf++;
1849       if (c > MAX_UNICODE_CHAR)
1850         c = coding->default_char;
1851
1852       if (c < 0x10000)
1853         {
1854           if (big_endian)
1855             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1856           else
1857             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1858         }
1859       else
1860         {
1861           int c1, c2;
1862
1863           c -= 0x10000;
1864           c1 = (c >> 10) + 0xD800;
1865           c2 = (c & 0x3FF) + 0xDC00;
1866           if (big_endian)
1867             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1868           else
1869             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1870         }
1871     }
1872   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1873   coding->produced = dst - coding->destination;
1874   coding->produced_char += produced_chars;
1875   return 0;
1876 }
1877
1878 \f
1879 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1880
1881 /* Emacs' internal format for representation of multiple character
1882    sets is a kind of multi-byte encoding, i.e. characters are
1883    represented by variable-length sequences of one-byte codes.
1884
1885    ASCII characters and control characters (e.g. `tab', `newline') are
1886    represented by one-byte sequences which are their ASCII codes, in
1887    the range 0x00 through 0x7F.
1888
1889    8-bit characters of the range 0x80..0x9F are represented by
1890    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1891    code + 0x20).
1892
1893    8-bit characters of the range 0xA0..0xFF are represented by
1894    one-byte sequences which are their 8-bit code.
1895
1896    The other characters are represented by a sequence of `base
1897    leading-code', optional `extended leading-code', and one or two
1898    `position-code's.  The length of the sequence is determined by the
1899    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1900    whereas extended leading-code and position-code take the range 0xA0
1901    through 0xFF.  See `charset.h' for more details about leading-code
1902    and position-code.
1903
1904    --- CODE RANGE of Emacs' internal format ---
1905    character set        range
1906    -------------        -----
1907    ascii                0x00..0x7F
1908    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1909    eight-bit-graphic    0xA0..0xBF
1910    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1911    ---------------------------------------------
1912
1913    As this is the internal character representation, the format is
1914    usually not used externally (i.e. in a file or in a data sent to a
1915    process).  But, it is possible to have a text externally in this
1916    format (i.e. by encoding by the coding system `emacs-mule').
1917
1918    In that case, a sequence of one-byte codes has a slightly different
1919    form.
1920
1921    At first, all characters in eight-bit-control are represented by
1922    one-byte sequences which are their 8-bit code.
1923
1924    Next, character composition data are represented by the byte
1925    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1926    where,
1927         METHOD is 0xF2 plus one of composition method (enum
1928         composition_method),
1929
1930         BYTES is 0xA0 plus a byte length of this composition data,
1931
1932         CHARS is 0xA0 plus a number of characters composed by this
1933         data,
1934
1935         COMPONENTs are characters of multibye form or composition
1936         rules encoded by two-byte of ASCII codes.
1937
1938    In addition, for backward compatibility, the following formats are
1939    also recognized as composition data on decoding.
1940
1941    0x80 MSEQ ...
1942    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1943
1944    Here,
1945         MSEQ is a multibyte form but in these special format:
1946           ASCII: 0xA0 ASCII_CODE+0x80,
1947           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1948         RULE is a one byte code of the range 0xA0..0xF0 that
1949         represents a composition rule.
1950   */
1951
1952 char emacs_mule_bytes[256];
1953
1954
1955 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1956    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1957    else return 0.  */
1958
1959 static int
1960 detect_coding_emacs_mule (struct coding_system *coding,
1961                           struct coding_detection_info *detect_info)
1962 {
1963   const unsigned char *src = coding->source, *src_base;
1964   const unsigned char *src_end = coding->source + coding->src_bytes;
1965   int multibytep = coding->src_multibyte;
1966   int consumed_chars = 0;
1967   int c;
1968   int found = 0;
1969
1970   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1971   /* A coding system of this category is always ASCII compatible.  */
1972   src += coding->head_ascii;
1973
1974   while (1)
1975     {
1976       src_base = src;
1977       ONE_MORE_BYTE (c);
1978       if (c < 0)
1979         continue;
1980       if (c == 0x80)
1981         {
1982           /* Perhaps the start of composite character.  We simply skip
1983              it because analyzing it is too heavy for detecting.  But,
1984              at least, we check that the composite character
1985              constitutes of more than 4 bytes.  */
1986           const unsigned char *src_base;
1987
1988         repeat:
1989           src_base = src;
1990           do
1991             {
1992               ONE_MORE_BYTE (c);
1993             }
1994           while (c >= 0xA0);
1995
1996           if (src - src_base <= 4)
1997             break;
1998           found = CATEGORY_MASK_EMACS_MULE;
1999           if (c == 0x80)
2000             goto repeat;
2001         }
2002
2003       if (c < 0x80)
2004         {
2005           if (c < 0x20
2006               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2007             break;
2008         }
2009       else
2010         {
2011           int more_bytes = emacs_mule_bytes[c] - 1;
2012
2013           while (more_bytes > 0)
2014             {
2015               ONE_MORE_BYTE (c);
2016               if (c < 0xA0)
2017                 {
2018                   src--;        /* Unread the last byte.  */
2019                   break;
2020                 }
2021               more_bytes--;
2022             }
2023           if (more_bytes != 0)
2024             break;
2025           found = CATEGORY_MASK_EMACS_MULE;
2026         }
2027     }
2028   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2029   return 0;
2030
2031  no_more_source:
2032   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2033     {
2034       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2035       return 0;
2036     }
2037   detect_info->found |= found;
2038   return 1;
2039 }
2040
2041
2042 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2043    character.  If CMP_STATUS indicates that we must expect MSEQ or
2044    RULE described above, decode it and return the negative value of
2045    the decoded character or rule.  If an invalid byte is found, return
2046    -1.  If SRC is too short, return -2.  */
2047
2048 int
2049 emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2050                  int *nbytes, int *nchars, int *id,
2051                  struct composition_status *cmp_status)
2052 {
2053   const unsigned char *src_end = coding->source + coding->src_bytes;
2054   const unsigned char *src_base = src;
2055   int multibytep = coding->src_multibyte;
2056   struct charset *charset;
2057   unsigned code;
2058   int c;
2059   int consumed_chars = 0;
2060   int mseq_found = 0;
2061
2062   ONE_MORE_BYTE (c);
2063   if (c < 0)
2064     {
2065       c = -c;
2066       charset = emacs_mule_charset[0];
2067     }
2068   else
2069     {
2070       if (c >= 0xA0)
2071         {
2072           if (cmp_status->state != COMPOSING_NO
2073               && cmp_status->old_form)
2074             {
2075               if (cmp_status->state == COMPOSING_CHAR)
2076                 {
2077                   if (c == 0xA0)
2078                     {
2079                       ONE_MORE_BYTE (c);
2080                       c -= 0x80;
2081                       if (c < 0)
2082                         goto invalid_code;
2083                     }
2084                   else
2085                     c -= 0x20;
2086                   mseq_found = 1;
2087                 }
2088               else
2089                 {
2090                   *nbytes = src - src_base;
2091                   *nchars = consumed_chars;
2092                   return -c;
2093                 }
2094             }
2095           else
2096             goto invalid_code;
2097         }
2098
2099       switch (emacs_mule_bytes[c])
2100         {
2101         case 2:
2102           if (! (charset = emacs_mule_charset[c]))
2103             goto invalid_code;
2104           ONE_MORE_BYTE (c);
2105           if (c < 0xA0)
2106             goto invalid_code;
2107           code = c & 0x7F;
2108           break;
2109
2110         case 3:
2111           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2112               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2113             {
2114               ONE_MORE_BYTE (c);
2115               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
2116                 goto invalid_code;
2117               ONE_MORE_BYTE (c);
2118               if (c < 0xA0)
2119                 goto invalid_code;
2120               code = c & 0x7F;
2121             }
2122           else
2123             {
2124               if (! (charset = emacs_mule_charset[c]))
2125                 goto invalid_code;
2126               ONE_MORE_BYTE (c);
2127               if (c < 0xA0)
2128                 goto invalid_code;
2129               code = (c & 0x7F) << 8;
2130               ONE_MORE_BYTE (c);
2131               if (c < 0xA0)
2132                 goto invalid_code;
2133               code |= c & 0x7F;
2134             }
2135           break;
2136
2137         case 4:
2138           ONE_MORE_BYTE (c);
2139           if (c < 0 || ! (charset = emacs_mule_charset[c]))
2140             goto invalid_code;
2141           ONE_MORE_BYTE (c);
2142           if (c < 0xA0)
2143             goto invalid_code;
2144           code = (c & 0x7F) << 8;
2145           ONE_MORE_BYTE (c);
2146           if (c < 0xA0)
2147             goto invalid_code;
2148           code |= c & 0x7F;
2149           break;
2150
2151         case 1:
2152           code = c;
2153           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2154                                      ? charset_ascii : charset_eight_bit);
2155           break;
2156
2157         default:
2158           abort ();
2159         }
2160       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, code, c);
2161       if (c < 0)
2162         goto invalid_code;
2163     }
2164   *nbytes = src - src_base;
2165   *nchars = consumed_chars;
2166   if (id)
2167     *id = charset->id;
2168   return (mseq_found ? -c : c);
2169
2170  no_more_source:
2171   return -2;
2172
2173  invalid_code:
2174   return -1;
2175 }
2176
2177
2178 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2179
2180 /* Handle these composition sequence ('|': the end of header elements,
2181    BYTES and CHARS >= 0xA0):
2182
2183    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2184    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2185    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2186
2187    and these old form:
2188
2189    (4) relative composition: 0x80 | MSEQ ... MSEQ
2190    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2191
2192    When the starter 0x80 and the following header elements are found,
2193    this annotation header is produced.
2194
2195         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2196
2197    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2198    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2199
2200    Then, upon reading the following elements, these codes are produced
2201    until the composition end is found:
2202
2203    (1) CHAR ... CHAR
2204    (2) ALT ... ALT CHAR ... CHAR
2205    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2206    (4) CHAR ... CHAR
2207    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2208
2209    When the composition end is found, LENGTH and NCHARS in the
2210    annotation header is updated as below:
2211
2212    (1) LENGTH: unchanged, NCHARS: unchanged
2213    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2214    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2215    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2216    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2217
2218    If an error is found while composing, the annotation header is
2219    changed to the original composition header (plus filler -1s) as
2220    below:
2221
2222    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2223    (5)          [ 0x80 0xFF -1 -1- -1 ]
2224
2225    and the sequence [ -2 DECODED-RULE ] is changed to the original
2226    byte sequence as below:
2227         o the original byte sequence is B: [ B -1 ]
2228         o the original byte sequence is B1 B2: [ B1 B2 ]
2229
2230    Most of the routines are implemented by macros because many
2231    variables and labels in the caller decode_coding_emacs_mule must be
2232    accessible, and they are usually called just once (thus doesn't
2233    increase the size of compiled object).  */
2234
2235 /* Decode a composition rule represented by C as a component of
2236    composition sequence of Emacs 20 style.  Set RULE to the decoded
2237    rule. */
2238
2239 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2240   do {                                                  \
2241     int gref, nref;                                     \
2242                                                         \
2243     c -= 0xA0;                                          \
2244     if (c < 0 || c >= 81)                               \
2245       goto invalid_code;                                \
2246     gref = c / 9, nref = c % 9;                         \
2247     if (gref == 4) gref = 10;                           \
2248     if (nref == 4) nref = 10;                           \
2249     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2250   } while (0)
2251
2252
2253 /* Decode a composition rule represented by C and the following byte
2254    at SRC as a component of composition sequence of Emacs 21 style.
2255    Set RULE to the decoded rule.  */
2256
2257 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2258   do {                                                  \
2259     int gref, nref;                                     \
2260                                                         \
2261     gref = c - 0x20;                                    \
2262     if (gref < 0 || gref >= 81)                         \
2263       goto invalid_code;                                \
2264     ONE_MORE_BYTE (c);                                  \
2265     nref = c - 0x20;                                    \
2266     if (nref < 0 || nref >= 81)                         \
2267       goto invalid_code;                                \
2268     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2269   } while (0)
2270
2271
2272 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2273    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2274    byte length of this composition information, CHARS is the number of
2275    characters composed by this composition.  */
2276
2277 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2278   do {                                                                  \
2279     enum composition_method method = c - 0xF2;                          \
2280     int *charbuf_base = charbuf;                                        \
2281     int nbytes, nchars;                                                 \
2282                                                                         \
2283     ONE_MORE_BYTE (c);                                                  \
2284     if (c < 0)                                                          \
2285       goto invalid_code;                                                \
2286     nbytes = c - 0xA0;                                                  \
2287     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2288       goto invalid_code;                                                \
2289     ONE_MORE_BYTE (c);                                                  \
2290     nchars = c - 0xA0;                                                  \
2291     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2292       goto invalid_code;                                                \
2293     cmp_status->old_form = 0;                                           \
2294     cmp_status->method = method;                                        \
2295     if (method == COMPOSITION_RELATIVE)                                 \
2296       cmp_status->state = COMPOSING_CHAR;                               \
2297     else                                                                \
2298       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2299     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2300     cmp_status->nchars = nchars;                                        \
2301     cmp_status->ncomps = nbytes - 4;                                    \
2302     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2303   } while (0)
2304
2305
2306 /* Start of Emacs 20 style format for relative composition.  */
2307
2308 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2309   do {                                                          \
2310     cmp_status->old_form = 1;                                   \
2311     cmp_status->method = COMPOSITION_RELATIVE;                  \
2312     cmp_status->state = COMPOSING_CHAR;                         \
2313     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2314     cmp_status->nchars = cmp_status->ncomps = 0;                \
2315     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2316   } while (0)
2317
2318
2319 /* Start of Emacs 20 style format for rule-base composition.  */
2320
2321 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2322   do {                                                          \
2323     cmp_status->old_form = 1;                                   \
2324     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2325     cmp_status->state = COMPOSING_CHAR;                         \
2326     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2327     cmp_status->nchars = cmp_status->ncomps = 0;                \
2328     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2329   } while (0)
2330
2331
2332 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2333   do {                                                  \
2334     const unsigned char *current_src = src;             \
2335                                                         \
2336     ONE_MORE_BYTE (c);                                  \
2337     if (c < 0)                                          \
2338       goto invalid_code;                                \
2339     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2340         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2341       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2342     else if (c < 0xA0)                                  \
2343       goto invalid_code;                                \
2344     else if (c < 0xC0)                                  \
2345       {                                                 \
2346         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2347         /* Re-read C as a composition component.  */    \
2348         src = current_src;                              \
2349       }                                                 \
2350     else if (c == 0xFF)                                 \
2351       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2352     else                                                \
2353       goto invalid_code;                                \
2354   } while (0)
2355
2356 #define EMACS_MULE_COMPOSITION_END()                            \
2357   do {                                                          \
2358     int idx = - cmp_status->length;                             \
2359                                                                 \
2360     if (cmp_status->old_form)                                   \
2361       charbuf[idx + 2] = cmp_status->nchars;                    \
2362     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2363       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2364     cmp_status->state = COMPOSING_NO;                           \
2365   } while (0)
2366
2367
2368 static int
2369 emacs_mule_finish_composition (int *charbuf,
2370                                struct composition_status *cmp_status)
2371 {
2372   int idx = - cmp_status->length;
2373   int new_chars;
2374
2375   if (cmp_status->old_form && cmp_status->nchars > 0)
2376     {
2377       charbuf[idx + 2] = cmp_status->nchars;
2378       new_chars = 0;
2379       if (cmp_status->method == COMPOSITION_WITH_RULE
2380           && cmp_status->state == COMPOSING_CHAR)
2381         {
2382           /* The last rule was invalid.  */
2383           int rule = charbuf[-1] + 0xA0;
2384
2385           charbuf[-2] = BYTE8_TO_CHAR (rule);
2386           charbuf[-1] = -1;
2387           new_chars = 1;
2388         }
2389     }
2390   else
2391     {
2392       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2393
2394       if (cmp_status->method == COMPOSITION_WITH_RULE)
2395         {
2396           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2397           charbuf[idx++] = -3;
2398           charbuf[idx++] = 0;
2399           new_chars = 1;
2400         }
2401       else
2402         {
2403           int nchars = charbuf[idx + 1] + 0xA0;
2404           int nbytes = charbuf[idx + 2] + 0xA0;
2405
2406           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2407           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2408           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2409           charbuf[idx++] = -1;
2410           new_chars = 4;
2411         }
2412     }
2413   cmp_status->state = COMPOSING_NO;
2414   return new_chars;
2415 }
2416
2417 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2418   do {                                                                    \
2419     if (cmp_status->state != COMPOSING_NO)                                \
2420       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2421   } while (0)
2422
2423
2424 static void
2425 decode_coding_emacs_mule (struct coding_system *coding)
2426 {
2427   const unsigned char *src = coding->source + coding->consumed;
2428   const unsigned char *src_end = coding->source + coding->src_bytes;
2429   const unsigned char *src_base;
2430   int *charbuf = coding->charbuf + coding->charbuf_used;
2431   /* We may produce two annocations (charset and composition) in one
2432      loop and one more charset annocation at the end.  */
2433   int *charbuf_end
2434     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2435   int consumed_chars = 0, consumed_chars_base;
2436   int multibytep = coding->src_multibyte;
2437   Lisp_Object attrs, charset_list;
2438   int char_offset = coding->produced_char;
2439   int last_offset = char_offset;
2440   int last_id = charset_ascii;
2441   int eol_crlf =
2442     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2443   int byte_after_cr = -1;
2444   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2445
2446   CODING_GET_INFO (coding, attrs, charset_list);
2447
2448   if (cmp_status->state != COMPOSING_NO)
2449     {
2450       int i;
2451
2452       for (i = 0; i < cmp_status->length; i++)
2453         *charbuf++ = cmp_status->carryover[i];
2454       coding->annotated = 1;
2455     }
2456
2457   while (1)
2458     {
2459       int c, id;
2460
2461       src_base = src;
2462       consumed_chars_base = consumed_chars;
2463
2464       if (charbuf >= charbuf_end)
2465         {
2466           if (byte_after_cr >= 0)
2467             src_base--;
2468           break;
2469         }
2470
2471       if (byte_after_cr >= 0)
2472         c = byte_after_cr, byte_after_cr = -1;
2473       else
2474         ONE_MORE_BYTE (c);
2475
2476       if (c < 0 || c == 0x80)
2477         {
2478           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2479           if (c < 0)
2480             {
2481               *charbuf++ = -c;
2482               char_offset++;
2483             }
2484           else
2485             DECODE_EMACS_MULE_COMPOSITION_START ();
2486           continue;
2487         }
2488
2489       if (c < 0x80)
2490         {
2491           if (eol_crlf && c == '\r')
2492             ONE_MORE_BYTE (byte_after_cr);
2493           id = charset_ascii;
2494           if (cmp_status->state != COMPOSING_NO)
2495             {
2496               if (cmp_status->old_form)
2497                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2498               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2499                 cmp_status->ncomps--;
2500             }
2501         }
2502       else
2503         {
2504           int nchars, nbytes;
2505           /* emacs_mule_char can load a charset map from a file, which
2506              allocates a large structure and might cause buffer text
2507              to be relocated as result.  Thus, we need to remember the
2508              original pointer to buffer text, and fixup all related
2509              pointers after the call.  */
2510           const unsigned char *orig = coding->source;
2511           EMACS_INT offset;
2512
2513           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2514                                cmp_status);
2515           offset = coding->source - orig;
2516           if (offset)
2517             {
2518               src += offset;
2519               src_base += offset;
2520               src_end += offset;
2521             }
2522           if (c < 0)
2523             {
2524               if (c == -1)
2525                 goto invalid_code;
2526               if (c == -2)
2527                 break;
2528             }
2529           src = src_base + nbytes;
2530           consumed_chars = consumed_chars_base + nchars;
2531           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2532             cmp_status->ncomps -= nchars;
2533         }
2534
2535       /* Now if C >= 0, we found a normally encoded characer, if C <
2536          0, we found an old-style composition component character or
2537          rule.  */
2538
2539       if (cmp_status->state == COMPOSING_NO)
2540         {
2541           if (last_id != id)
2542             {
2543               if (last_id != charset_ascii)
2544                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2545                                   last_id);
2546               last_id = id;
2547               last_offset = char_offset;
2548             }
2549           *charbuf++ = c;
2550           char_offset++;
2551         }
2552       else if (cmp_status->state == COMPOSING_CHAR)
2553         {
2554           if (cmp_status->old_form)
2555             {
2556               if (c >= 0)
2557                 {
2558                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2559                   *charbuf++ = c;
2560                   char_offset++;
2561                 }
2562               else
2563                 {
2564                   *charbuf++ = -c;
2565                   cmp_status->nchars++;
2566                   cmp_status->length++;
2567                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2568                     EMACS_MULE_COMPOSITION_END ();
2569                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2570                     cmp_status->state = COMPOSING_RULE;
2571                 }
2572             }
2573           else
2574             {
2575               *charbuf++ = c;
2576               cmp_status->length++;
2577               cmp_status->nchars--;
2578               if (cmp_status->nchars == 0)
2579                 EMACS_MULE_COMPOSITION_END ();
2580             }
2581         }
2582       else if (cmp_status->state == COMPOSING_RULE)
2583         {
2584           int rule;
2585
2586           if (c >= 0)
2587             {
2588               EMACS_MULE_COMPOSITION_END ();
2589               *charbuf++ = c;
2590               char_offset++;
2591             }
2592           else
2593             {
2594               c = -c;
2595               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2596               if (rule < 0)
2597                 goto invalid_code;
2598               *charbuf++ = -2;
2599               *charbuf++ = rule;
2600               cmp_status->length += 2;
2601               cmp_status->state = COMPOSING_CHAR;
2602             }
2603         }
2604       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2605         {
2606           *charbuf++ = c;
2607           cmp_status->length++;
2608           if (cmp_status->ncomps == 0)
2609             cmp_status->state = COMPOSING_CHAR;
2610           else if (cmp_status->ncomps > 0)
2611             {
2612               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2613                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2614             }
2615           else
2616             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2617         }
2618       else                      /* COMPOSING_COMPONENT_RULE */
2619         {
2620           int rule;
2621
2622           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2623           if (rule < 0)
2624             goto invalid_code;
2625           *charbuf++ = -2;
2626           *charbuf++ = rule;
2627           cmp_status->length += 2;
2628           cmp_status->ncomps--;
2629           if (cmp_status->ncomps > 0)
2630             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2631           else
2632             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2633         }
2634       continue;
2635
2636     retry:
2637       src = src_base;
2638       consumed_chars = consumed_chars_base;
2639       continue;
2640
2641     invalid_code:
2642       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2643       src = src_base;
2644       consumed_chars = consumed_chars_base;
2645       ONE_MORE_BYTE (c);
2646       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2647       char_offset++;
2648       coding->errors++;
2649     }
2650
2651  no_more_source:
2652   if (cmp_status->state != COMPOSING_NO)
2653     {
2654       if (coding->mode & CODING_MODE_LAST_BLOCK)
2655         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2656       else
2657         {
2658           int i;
2659
2660           charbuf -= cmp_status->length;
2661           for (i = 0; i < cmp_status->length; i++)
2662             cmp_status->carryover[i] = charbuf[i];
2663         }
2664     }
2665   if (last_id != charset_ascii)
2666     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2667   coding->consumed_char += consumed_chars_base;
2668   coding->consumed = src_base - coding->source;
2669   coding->charbuf_used = charbuf - coding->charbuf;
2670 }
2671
2672
2673 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2674   do {                                          \
2675     if (id < 0xA0)                              \
2676       codes[0] = id, codes[1] = 0;              \
2677     else if (id < 0xE0)                         \
2678       codes[0] = 0x9A, codes[1] = id;           \
2679     else if (id < 0xF0)                         \
2680       codes[0] = 0x9B, codes[1] = id;           \
2681     else if (id < 0xF5)                         \
2682       codes[0] = 0x9C, codes[1] = id;           \
2683     else                                        \
2684       codes[0] = 0x9D, codes[1] = id;           \
2685   } while (0);
2686
2687
2688 static int
2689 encode_coding_emacs_mule (struct coding_system *coding)
2690 {
2691   int multibytep = coding->dst_multibyte;
2692   int *charbuf = coding->charbuf;
2693   int *charbuf_end = charbuf + coding->charbuf_used;
2694   unsigned char *dst = coding->destination + coding->produced;
2695   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2696   int safe_room = 8;
2697   int produced_chars = 0;
2698   Lisp_Object attrs, charset_list;
2699   int c;
2700   int preferred_charset_id = -1;
2701
2702   CODING_GET_INFO (coding, attrs, charset_list);
2703   if (! EQ (charset_list, Vemacs_mule_charset_list))
2704     {
2705       CODING_ATTR_CHARSET_LIST (attrs)
2706         = charset_list = Vemacs_mule_charset_list;
2707     }
2708
2709   while (charbuf < charbuf_end)
2710     {
2711       ASSURE_DESTINATION (safe_room);
2712       c = *charbuf++;
2713
2714       if (c < 0)
2715         {
2716           /* Handle an annotation.  */
2717           switch (*charbuf)
2718             {
2719             case CODING_ANNOTATE_COMPOSITION_MASK:
2720               /* Not yet implemented.  */
2721               break;
2722             case CODING_ANNOTATE_CHARSET_MASK:
2723               preferred_charset_id = charbuf[3];
2724               if (preferred_charset_id >= 0
2725                   && NILP (Fmemq (make_number (preferred_charset_id),
2726                                   charset_list)))
2727                 preferred_charset_id = -1;
2728               break;
2729             default:
2730               abort ();
2731             }
2732           charbuf += -c - 1;
2733           continue;
2734         }
2735
2736       if (ASCII_CHAR_P (c))
2737         EMIT_ONE_ASCII_BYTE (c);
2738       else if (CHAR_BYTE8_P (c))
2739         {
2740           c = CHAR_TO_BYTE8 (c);
2741           EMIT_ONE_BYTE (c);
2742         }
2743       else
2744         {
2745           struct charset *charset;
2746           unsigned code;
2747           int dimension;
2748           int emacs_mule_id;
2749           unsigned char leading_codes[2];
2750
2751           if (preferred_charset_id >= 0)
2752             {
2753               charset = CHARSET_FROM_ID (preferred_charset_id);
2754               if (CHAR_CHARSET_P (c, charset))
2755                 code = ENCODE_CHAR (charset, c);
2756               else
2757                 charset = char_charset (c, charset_list, &code);
2758             }
2759           else
2760             charset = char_charset (c, charset_list, &code);
2761           if (! charset)
2762             {
2763               c = coding->default_char;
2764               if (ASCII_CHAR_P (c))
2765                 {
2766                   EMIT_ONE_ASCII_BYTE (c);
2767                   continue;
2768                 }
2769               charset = char_charset (c, charset_list, &code);
2770             }
2771           dimension = CHARSET_DIMENSION (charset);
2772           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2773           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2774           EMIT_ONE_BYTE (leading_codes[0]);
2775           if (leading_codes[1])
2776             EMIT_ONE_BYTE (leading_codes[1]);
2777           if (dimension == 1)
2778             EMIT_ONE_BYTE (code | 0x80);
2779           else
2780             {
2781               code |= 0x8080;
2782               EMIT_ONE_BYTE (code >> 8);
2783               EMIT_ONE_BYTE (code & 0xFF);
2784             }
2785         }
2786     }
2787   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2788   coding->produced_char += produced_chars;
2789   coding->produced = dst - coding->destination;
2790   return 0;
2791 }
2792
2793 \f
2794 /*** 7. ISO2022 handlers ***/
2795
2796 /* The following note describes the coding system ISO2022 briefly.
2797    Since the intention of this note is to help understand the
2798    functions in this file, some parts are NOT ACCURATE or are OVERLY
2799    SIMPLIFIED.  For thorough understanding, please refer to the
2800    original document of ISO2022.  This is equivalent to the standard
2801    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2802
2803    ISO2022 provides many mechanisms to encode several character sets
2804    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2805    is encoded using bytes less than 128.  This may make the encoded
2806    text a little bit longer, but the text passes more easily through
2807    several types of gateway, some of which strip off the MSB (Most
2808    Significant Bit).
2809
2810    There are two kinds of character sets: control character sets and
2811    graphic character sets.  The former contain control characters such
2812    as `newline' and `escape' to provide control functions (control
2813    functions are also provided by escape sequences).  The latter
2814    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2815    two control character sets and many graphic character sets.
2816
2817    Graphic character sets are classified into one of the following
2818    four classes, according to the number of bytes (DIMENSION) and
2819    number of characters in one dimension (CHARS) of the set:
2820    - DIMENSION1_CHARS94
2821    - DIMENSION1_CHARS96
2822    - DIMENSION2_CHARS94
2823    - DIMENSION2_CHARS96
2824
2825    In addition, each character set is assigned an identification tag,
2826    unique for each set, called the "final character" (denoted as <F>
2827    hereafter).  The <F> of each character set is decided by ECMA(*)
2828    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2829    (0x30..0x3F are for private use only).
2830
2831    Note (*): ECMA = European Computer Manufacturers Association
2832
2833    Here are examples of graphic character sets [NAME(<F>)]:
2834         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2835         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2836         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2837         o DIMENSION2_CHARS96 -- none for the moment
2838
2839    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2840         C0 [0x00..0x1F] -- control character plane 0
2841         GL [0x20..0x7F] -- graphic character plane 0
2842         C1 [0x80..0x9F] -- control character plane 1
2843         GR [0xA0..0xFF] -- graphic character plane 1
2844
2845    A control character set is directly designated and invoked to C0 or
2846    C1 by an escape sequence.  The most common case is that:
2847    - ISO646's  control character set is designated/invoked to C0, and
2848    - ISO6429's control character set is designated/invoked to C1,
2849    and usually these designations/invocations are omitted in encoded
2850    text.  In a 7-bit environment, only C0 can be used, and a control
2851    character for C1 is encoded by an appropriate escape sequence to
2852    fit into the environment.  All control characters for C1 are
2853    defined to have corresponding escape sequences.
2854
2855    A graphic character set is at first designated to one of four
2856    graphic registers (G0 through G3), then these graphic registers are
2857    invoked to GL or GR.  These designations and invocations can be
2858    done independently.  The most common case is that G0 is invoked to
2859    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2860    these invocations and designations are omitted in encoded text.
2861    In a 7-bit environment, only GL can be used.
2862
2863    When a graphic character set of CHARS94 is invoked to GL, codes
2864    0x20 and 0x7F of the GL area work as control characters SPACE and
2865    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2866    be used.
2867
2868    There are two ways of invocation: locking-shift and single-shift.
2869    With locking-shift, the invocation lasts until the next different
2870    invocation, whereas with single-shift, the invocation affects the
2871    following character only and doesn't affect the locking-shift
2872    state.  Invocations are done by the following control characters or
2873    escape sequences:
2874
2875    ----------------------------------------------------------------------
2876    abbrev  function                  cntrl escape seq   description
2877    ----------------------------------------------------------------------
2878    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2879    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2880    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2881    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2882    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2883    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2884    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2885    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2886    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2887    ----------------------------------------------------------------------
2888    (*) These are not used by any known coding system.
2889
2890    Control characters for these functions are defined by macros
2891    ISO_CODE_XXX in `coding.h'.
2892
2893    Designations are done by the following escape sequences:
2894    ----------------------------------------------------------------------
2895    escape sequence      description
2896    ----------------------------------------------------------------------
2897    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2898    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2899    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2900    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2901    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2902    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2903    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2904    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2905    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2906    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2907    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2908    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2909    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2910    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2911    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2912    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2913    ----------------------------------------------------------------------
2914
2915    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2916    of dimension 1, chars 94, and final character <F>, etc...
2917
2918    Note (*): Although these designations are not allowed in ISO2022,
2919    Emacs accepts them on decoding, and produces them on encoding
2920    CHARS96 character sets in a coding system which is characterized as
2921    7-bit environment, non-locking-shift, and non-single-shift.
2922
2923    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2924    '(' must be omitted.  We refer to this as "short-form" hereafter.
2925
2926    Now you may notice that there are a lot of ways of encoding the
2927    same multilingual text in ISO2022.  Actually, there exist many
2928    coding systems such as Compound Text (used in X11's inter client
2929    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2930    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2931    localized platforms), and all of these are variants of ISO2022.
2932
2933    In addition to the above, Emacs handles two more kinds of escape
2934    sequences: ISO6429's direction specification and Emacs' private
2935    sequence for specifying character composition.
2936
2937    ISO6429's direction specification takes the following form:
2938         o CSI ']'      -- end of the current direction
2939         o CSI '0' ']'  -- end of the current direction
2940         o CSI '1' ']'  -- start of left-to-right text
2941         o CSI '2' ']'  -- start of right-to-left text
2942    The control character CSI (0x9B: control sequence introducer) is
2943    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2944
2945    Character composition specification takes the following form:
2946         o ESC '0' -- start relative composition
2947         o ESC '1' -- end composition
2948         o ESC '2' -- start rule-base composition (*)
2949         o ESC '3' -- start relative composition with alternate chars  (**)
2950         o ESC '4' -- start rule-base composition with alternate chars  (**)
2951   Since these are not standard escape sequences of any ISO standard,
2952   the use of them with these meanings is restricted to Emacs only.
2953
2954   (*) This form is used only in Emacs 20.7 and older versions,
2955   but newer versions can safely decode it.
2956   (**) This form is used only in Emacs 21.1 and newer versions,
2957   and older versions can't decode it.
2958
2959   Here's a list of example usages of these composition escape
2960   sequences (categorized by `enum composition_method').
2961
2962   COMPOSITION_RELATIVE:
2963         ESC 0 CHAR [ CHAR ] ESC 1
2964   COMPOSITION_WITH_RULE:
2965         ESC 2 CHAR [ RULE CHAR ] ESC 1
2966   COMPOSITION_WITH_ALTCHARS:
2967         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2968   COMPOSITION_WITH_RULE_ALTCHARS:
2969         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2970
2971 enum iso_code_class_type iso_code_class[256];
2972
2973 #define SAFE_CHARSET_P(coding, id)      \
2974   ((id) <= (coding)->max_charset_id     \
2975    && (coding)->safe_charsets[id] != 255)
2976
2977
2978 #define SHIFT_OUT_OK(category)  \
2979   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2980
2981 static void
2982 setup_iso_safe_charsets (Lisp_Object attrs)
2983 {
2984   Lisp_Object charset_list, safe_charsets;
2985   Lisp_Object request;
2986   Lisp_Object reg_usage;
2987   Lisp_Object tail;
2988   int reg94, reg96;
2989   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2990   int max_charset_id;
2991
2992   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2993   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2994       && ! EQ (charset_list, Viso_2022_charset_list))
2995     {
2996       CODING_ATTR_CHARSET_LIST (attrs)
2997         = charset_list = Viso_2022_charset_list;
2998       ASET (attrs, coding_attr_safe_charsets, Qnil);
2999     }
3000
3001   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3002     return;
3003
3004   max_charset_id = 0;
3005   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3006     {
3007       int id = XINT (XCAR (tail));
3008       if (max_charset_id < id)
3009         max_charset_id = id;
3010     }
3011
3012   safe_charsets = make_uninit_string (max_charset_id + 1);
3013   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
3014   request = AREF (attrs, coding_attr_iso_request);
3015   reg_usage = AREF (attrs, coding_attr_iso_usage);
3016   reg94 = XINT (XCAR (reg_usage));
3017   reg96 = XINT (XCDR (reg_usage));
3018
3019   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3020     {
3021       Lisp_Object id;
3022       Lisp_Object reg;
3023       struct charset *charset;
3024
3025       id = XCAR (tail);
3026       charset = CHARSET_FROM_ID (XINT (id));
3027       reg = Fcdr (Fassq (id, request));
3028       if (! NILP (reg))
3029         SSET (safe_charsets, XINT (id), XINT (reg));
3030       else if (charset->iso_chars_96)
3031         {
3032           if (reg96 < 4)
3033             SSET (safe_charsets, XINT (id), reg96);
3034         }
3035       else
3036         {
3037           if (reg94 < 4)
3038             SSET (safe_charsets, XINT (id), reg94);
3039         }
3040     }
3041   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3042 }
3043
3044
3045 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3046    Check if a text is encoded in one of ISO-2022 based codig systems.
3047    If it is, return 1, else return 0.  */
3048
3049 static int
3050 detect_coding_iso_2022 (struct coding_system *coding,
3051                         struct coding_detection_info *detect_info)
3052 {
3053   const unsigned char *src = coding->source, *src_base = src;
3054   const unsigned char *src_end = coding->source + coding->src_bytes;
3055   int multibytep = coding->src_multibyte;
3056   int single_shifting = 0;
3057   int id;
3058   int c, c1;
3059   int consumed_chars = 0;
3060   int i;
3061   int rejected = 0;
3062   int found = 0;
3063   int composition_count = -1;
3064
3065   detect_info->checked |= CATEGORY_MASK_ISO;
3066
3067   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3068     {
3069       struct coding_system *this = &(coding_categories[i]);
3070       Lisp_Object attrs, val;
3071
3072       if (this->id < 0)
3073         continue;
3074       attrs = CODING_ID_ATTRS (this->id);
3075       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3076           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3077         setup_iso_safe_charsets (attrs);
3078       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3079       this->max_charset_id = SCHARS (val) - 1;
3080       this->safe_charsets = SDATA (val);
3081     }
3082
3083   /* A coding system of this category is always ASCII compatible.  */
3084   src += coding->head_ascii;
3085
3086   while (rejected != CATEGORY_MASK_ISO)
3087     {
3088       src_base = src;
3089       ONE_MORE_BYTE (c);
3090       switch (c)
3091         {
3092         case ISO_CODE_ESC:
3093           if (inhibit_iso_escape_detection)
3094             break;
3095           single_shifting = 0;
3096           ONE_MORE_BYTE (c);
3097           if (c >= '(' && c <= '/')
3098             {
3099               /* Designation sequence for a charset of dimension 1.  */
3100               ONE_MORE_BYTE (c1);
3101               if (c1 < ' ' || c1 >= 0x80
3102                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3103                 /* Invalid designation sequence.  Just ignore.  */
3104                 break;
3105             }
3106           else if (c == '$')
3107             {
3108               /* Designation sequence for a charset of dimension 2.  */
3109               ONE_MORE_BYTE (c);
3110               if (c >= '@' && c <= 'B')
3111                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3112                 id = iso_charset_table[1][0][c];
3113               else if (c >= '(' && c <= '/')
3114                 {
3115                   ONE_MORE_BYTE (c1);
3116                   if (c1 < ' ' || c1 >= 0x80
3117                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3118                     /* Invalid designation sequence.  Just ignore.  */
3119                     break;
3120                 }
3121               else
3122                 /* Invalid designation sequence.  Just ignore it.  */
3123                 break;
3124             }
3125           else if (c == 'N' || c == 'O')
3126             {
3127               /* ESC <Fe> for SS2 or SS3.  */
3128               single_shifting = 1;
3129               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3130               break;
3131             }
3132           else if (c == '1')
3133             {
3134               /* End of composition.  */
3135               if (composition_count < 0
3136                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3137                 /* Invalid */
3138                 break;
3139               composition_count = -1;
3140               found |= CATEGORY_MASK_ISO;
3141             }
3142           else if (c >= '0' && c <= '4')
3143             {
3144               /* ESC <Fp> for start/end composition.  */
3145               composition_count = 0;
3146               break;
3147             }
3148           else
3149             {
3150               /* Invalid escape sequence.  Just ignore it.  */
3151               break;
3152             }
3153
3154           /* We found a valid designation sequence for CHARSET.  */
3155           rejected |= CATEGORY_MASK_ISO_8BIT;
3156           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3157                               id))
3158             found |= CATEGORY_MASK_ISO_7;
3159           else
3160             rejected |= CATEGORY_MASK_ISO_7;
3161           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3162                               id))
3163             found |= CATEGORY_MASK_ISO_7_TIGHT;
3164           else
3165             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3166           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3167                               id))
3168             found |= CATEGORY_MASK_ISO_7_ELSE;
3169           else
3170             rejected |= CATEGORY_MASK_ISO_7_ELSE;
3171           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3172                               id))
3173             found |= CATEGORY_MASK_ISO_8_ELSE;
3174           else
3175             rejected |= CATEGORY_MASK_ISO_8_ELSE;
3176           break;
3177
3178         case ISO_CODE_SO:
3179         case ISO_CODE_SI:
3180           /* Locking shift out/in.  */
3181           if (inhibit_iso_escape_detection)
3182             break;
3183           single_shifting = 0;
3184           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3185           break;
3186
3187         case ISO_CODE_CSI:
3188           /* Control sequence introducer.  */
3189           single_shifting = 0;
3190           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3191           found |= CATEGORY_MASK_ISO_8_ELSE;
3192           goto check_extra_latin;
3193
3194         case ISO_CODE_SS2:
3195         case ISO_CODE_SS3:
3196           /* Single shift.   */
3197           if (inhibit_iso_escape_detection)
3198             break;
3199           single_shifting = 0;
3200           rejected |= CATEGORY_MASK_ISO_7BIT;
3201           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3202               & CODING_ISO_FLAG_SINGLE_SHIFT)
3203             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
3204           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3205               & CODING_ISO_FLAG_SINGLE_SHIFT)
3206             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3207           if (single_shifting)
3208             break;
3209           goto check_extra_latin;
3210
3211         default:
3212           if (c < 0)
3213             continue;
3214           if (c < 0x80)
3215             {
3216               if (composition_count >= 0)
3217                 composition_count++;
3218               single_shifting = 0;
3219               break;
3220             }
3221           if (c >= 0xA0)
3222             {
3223               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3224               found |= CATEGORY_MASK_ISO_8_1;
3225               /* Check the length of succeeding codes of the range
3226                  0xA0..0FF.  If the byte length is even, we include
3227                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3228                  only when we are not single shifting.  */
3229               if (! single_shifting
3230                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3231                 {
3232                   int i = 1;
3233                   while (src < src_end)
3234                     {
3235                       src_base = src;
3236                       ONE_MORE_BYTE (c);
3237                       if (c < 0xA0)
3238                         {
3239                           src = src_base;
3240                           break;
3241                         }
3242                       i++;
3243                     }
3244
3245                   if (i & 1 && src < src_end)
3246                     {
3247                       rejected |= CATEGORY_MASK_ISO_8_2;
3248                       if (composition_count >= 0)
3249                         composition_count += i;
3250                     }
3251                   else
3252                     {
3253                       found |= CATEGORY_MASK_ISO_8_2;
3254                       if (composition_count >= 0)
3255                         composition_count += i / 2;
3256                     }
3257                 }
3258               break;
3259             }
3260         check_extra_latin:
3261           single_shifting = 0;
3262           if (! VECTORP (Vlatin_extra_code_table)
3263               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3264             {
3265               rejected = CATEGORY_MASK_ISO;
3266               break;
3267             }
3268           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3269               & CODING_ISO_FLAG_LATIN_EXTRA)
3270             found |= CATEGORY_MASK_ISO_8_1;
3271           else
3272             rejected |= CATEGORY_MASK_ISO_8_1;
3273           rejected |= CATEGORY_MASK_ISO_8_2;
3274         }
3275     }
3276   detect_info->rejected |= CATEGORY_MASK_ISO;
3277   return 0;
3278
3279  no_more_source:
3280   detect_info->rejected |= rejected;
3281   detect_info->found |= (found & ~rejected);
3282   return 1;
3283 }
3284
3285
3286 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3287    escape sequence should be kept.  */
3288 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3289   do {                                                                  \
3290     int id, prev;                                                       \
3291                                                                         \
3292     if (final < '0' || final >= 128                                     \
3293         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3294         || !SAFE_CHARSET_P (coding, id))                                \
3295       {                                                                 \
3296         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3297         chars_96 = -1;                                                  \
3298         break;                                                          \
3299       }                                                                 \
3300     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3301     if (id == charset_jisx0201_roman)                                   \
3302       {                                                                 \
3303         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3304           id = charset_ascii;                                           \
3305       }                                                                 \
3306     else if (id == charset_jisx0208_1978)                               \
3307       {                                                                 \
3308         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3309           id = charset_jisx0208;                                        \
3310       }                                                                 \
3311     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3312     /* If there was an invalid designation to REG previously, and this  \
3313        designation is ASCII to REG, we should keep this designation     \
3314        sequence.  */                                                    \
3315     if (prev == -2 && id == charset_ascii)                              \
3316       chars_96 = -1;                                                    \
3317   } while (0)
3318
3319
3320 /* Handle these composition sequence (ALT: alternate char):
3321
3322    (1) relative composition: ESC 0 CHAR ... ESC 1
3323    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3324    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3325    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3326
3327    When the start sequence (ESC 0/2/3/4) is found, this annotation
3328    header is produced.
3329
3330         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3331
3332    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3333    produced until the end sequence (ESC 1) is found:
3334
3335    (1) CHAR ... CHAR
3336    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3337    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3338    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3339
3340    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3341    annotation header is updated as below:
3342
3343    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3344    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3345    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3346    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3347
3348    If an error is found while composing, the annotation header is
3349    changed to:
3350
3351         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3352
3353    and the sequence [ -2 DECODED-RULE ] is changed to the original
3354    byte sequence as below:
3355         o the original byte sequence is B: [ B -1 ]
3356         o the original byte sequence is B1 B2: [ B1 B2 ]
3357    and the sequence [ -1 -1 ] is changed to the original byte
3358    sequence:
3359         [ ESC '0' ]
3360 */
3361
3362 /* Decode a composition rule C1 and maybe one more byte from the
3363    source, and set RULE to the encoded composition rule, NBYTES to the
3364    length of the composition rule.  If the rule is invalid, set RULE
3365    to some negative value.  */
3366
3367 #define DECODE_COMPOSITION_RULE(rule, nbytes)                           \
3368   do {                                                                  \
3369     rule = c1 - 32;                                                     \
3370     if (rule < 0)                                                       \
3371       break;                                                            \
3372     if (rule < 81)              /* old format (before ver.21) */        \
3373       {                                                                 \
3374         int gref = (rule) / 9;                                          \
3375         int nref = (rule) % 9;                                          \
3376         if (gref == 4) gref = 10;                                       \
3377         if (nref == 4) nref = 10;                                       \
3378         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3379         nbytes = 1;                                                     \
3380       }                                                                 \
3381     else                        /* new format (after ver.21) */         \
3382       {                                                                 \
3383         int c;                                                          \
3384                                                                         \
3385         ONE_MORE_BYTE (c);                                              \
3386         rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32);             \
3387         if (rule >= 0)                                                  \
3388           rule += 0x100;   /* to destinguish it from the old format */  \
3389         nbytes = 2;                                                     \
3390       }                                                                 \
3391   } while (0)
3392
3393 #define ENCODE_COMPOSITION_RULE(rule)                           \
3394   do {                                                          \
3395     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3396                                                                 \
3397     if (rule < 0x100)           /* old format */                \
3398       {                                                         \
3399         if (gref == 10) gref = 4;                               \
3400         if (nref == 10) nref = 4;                               \
3401         charbuf[idx] = 32 + gref * 9 + nref;                    \
3402         charbuf[idx + 1] = -1;                                  \
3403         new_chars++;                                            \
3404       }                                                         \
3405     else                                /* new format */        \
3406       {                                                         \
3407         charbuf[idx] = 32 + 81 + gref;                          \
3408         charbuf[idx + 1] = 32 + nref;                           \
3409         new_chars += 2;                                         \
3410       }                                                         \
3411   } while (0)
3412
3413 /* Finish the current composition as invalid.  */
3414
3415 static int finish_composition (int *, struct composition_status *);
3416
3417 static int
3418 finish_composition (int *charbuf, struct composition_status *cmp_status)
3419 {
3420   int idx = - cmp_status->length;
3421   int new_chars;
3422
3423   /* Recover the original ESC sequence */
3424   charbuf[idx++] = ISO_CODE_ESC;
3425   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3426                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3427                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3428                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3429                     : '4');
3430   charbuf[idx++] = -2;
3431   charbuf[idx++] = 0;
3432   charbuf[idx++] = -1;
3433   new_chars = cmp_status->nchars;
3434   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3435     for (; idx < 0; idx++)
3436       {
3437         int elt = charbuf[idx];
3438
3439         if (elt == -2)
3440           {
3441             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3442             idx++;
3443           }
3444         else if (elt == -1)
3445           {
3446             charbuf[idx++] = ISO_CODE_ESC;
3447             charbuf[idx] = '0';
3448             new_chars += 2;
3449           }
3450       }
3451   cmp_status->state = COMPOSING_NO;
3452   return new_chars;
3453 }
3454
3455 /* If characers are under composition, finish the composition.  */
3456 #define MAYBE_FINISH_COMPOSITION()                              \
3457   do {                                                          \
3458     if (cmp_status->state != COMPOSING_NO)                      \
3459       char_offset += finish_composition (charbuf, cmp_status);  \
3460   } while (0)
3461
3462 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3463
3464    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3465    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3466    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3467    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3468
3469    Produce this annotation sequence now:
3470
3471    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3472 */
3473
3474 #define DECODE_COMPOSITION_START(c1)                                       \
3475   do {                                                                     \
3476     if (c1 == '0'                                                          \
3477         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3478              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3479             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3480                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3481       {                                                                    \
3482         *charbuf++ = -1;                                                   \
3483         *charbuf++= -1;                                                    \
3484         cmp_status->state = COMPOSING_CHAR;                                \
3485         cmp_status->length += 2;                                           \
3486       }                                                                    \
3487     else                                                                   \
3488       {                                                                    \
3489         MAYBE_FINISH_COMPOSITION ();                                       \
3490         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3491                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3492                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3493                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3494         cmp_status->state                                                  \
3495           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3496         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3497         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3498         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3499         coding->annotated = 1;                                             \
3500       }                                                                    \
3501   } while (0)
3502
3503
3504 /* Handle composition end sequence ESC 1.  */
3505
3506 #define DECODE_COMPOSITION_END()                                        \
3507   do {                                                                  \
3508     if (cmp_status->nchars == 0                                         \
3509         || ((cmp_status->state == COMPOSING_CHAR)                       \
3510             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3511       {                                                                 \
3512         MAYBE_FINISH_COMPOSITION ();                                    \
3513         goto invalid_code;                                              \
3514       }                                                                 \
3515     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3516       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3517     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3518       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3519     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3520     char_offset += cmp_status->nchars;                                  \
3521     cmp_status->state = COMPOSING_NO;                                   \
3522   } while (0)
3523
3524 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3525
3526 #define STORE_COMPOSITION_RULE(rule)    \
3527   do {                                  \
3528     *charbuf++ = -2;                    \
3529     *charbuf++ = rule;                  \
3530     cmp_status->length += 2;            \
3531     cmp_status->state--;                \
3532   } while (0)
3533
3534 /* Store a composed char or a component char C in charbuf, and update
3535    cmp_status.  */
3536
3537 #define STORE_COMPOSITION_CHAR(c)                                       \
3538   do {                                                                  \
3539     *charbuf++ = (c);                                                   \
3540     cmp_status->length++;                                               \
3541     if (cmp_status->state == COMPOSING_CHAR)                            \
3542       cmp_status->nchars++;                                             \
3543     else                                                                \
3544       cmp_status->ncomps++;                                             \
3545     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3546         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3547             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3548       cmp_status->state++;                                              \
3549   } while (0)
3550
3551
3552 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3553
3554 static void
3555 decode_coding_iso_2022 (struct coding_system *coding)
3556 {
3557   const unsigned char *src = coding->source + coding->consumed;
3558   const unsigned char *src_end = coding->source + coding->src_bytes;
3559   const unsigned char *src_base;
3560   int *charbuf = coding->charbuf + coding->charbuf_used;
3561   /* We may produce two annocations (charset and composition) in one
3562      loop and one more charset annocation at the end.  */
3563   int *charbuf_end
3564     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3565   int consumed_chars = 0, consumed_chars_base;
3566   int multibytep = coding->src_multibyte;
3567   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3568   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3569   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3570   int charset_id_2, charset_id_3;
3571   struct charset *charset;
3572   int c;
3573   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3574   Lisp_Object attrs, charset_list;
3575   int char_offset = coding->produced_char;
3576   int last_offset = char_offset;
3577   int last_id = charset_ascii;
3578   int eol_crlf =
3579     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3580   int byte_after_cr = -1;
3581   int i;
3582
3583   CODING_GET_INFO (coding, attrs, charset_list);
3584   setup_iso_safe_charsets (attrs);
3585   /* Charset list may have been changed.  */
3586   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3587   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3588
3589   if (cmp_status->state != COMPOSING_NO)
3590     {
3591       for (i = 0; i < cmp_status->length; i++)
3592         *charbuf++ = cmp_status->carryover[i];
3593       coding->annotated = 1;
3594     }
3595
3596   while (1)
3597     {
3598       int c1, c2, c3;
3599
3600       src_base = src;
3601       consumed_chars_base = consumed_chars;
3602
3603       if (charbuf >= charbuf_end)
3604         {
3605           if (byte_after_cr >= 0)
3606             src_base--;
3607           break;
3608         }
3609
3610       if (byte_after_cr >= 0)
3611         c1 = byte_after_cr, byte_after_cr = -1;
3612       else
3613         ONE_MORE_BYTE (c1);
3614       if (c1 < 0)
3615         goto invalid_code;
3616
3617       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3618         {
3619           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3620           char_offset++;
3621           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3622           continue;
3623         }
3624
3625       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3626         {
3627           if (c1 == ISO_CODE_ESC)
3628             {
3629               if (src + 1 >= src_end)
3630                 goto no_more_source;
3631               *charbuf++ = ISO_CODE_ESC;
3632               char_offset++;
3633               if (src[0] == '%' && src[1] == '@')
3634                 {
3635                   src += 2;
3636                   consumed_chars += 2;
3637                   char_offset += 2;
3638                   /* We are sure charbuf can contain two more chars. */
3639                   *charbuf++ = '%';
3640                   *charbuf++ = '@';
3641                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3642                 }
3643             }
3644           else
3645             {
3646               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3647               char_offset++;
3648             }
3649           continue;
3650         }
3651
3652       if ((cmp_status->state == COMPOSING_RULE
3653            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3654           && c1 != ISO_CODE_ESC)
3655         {
3656           int rule, nbytes;
3657
3658           DECODE_COMPOSITION_RULE (rule, nbytes);
3659           if (rule < 0)
3660             goto invalid_code;
3661           STORE_COMPOSITION_RULE (rule);
3662           continue;
3663         }
3664
3665       /* We produce at most one character.  */
3666       switch (iso_code_class [c1])
3667         {
3668         case ISO_0x20_or_0x7F:
3669           if (charset_id_0 < 0
3670               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3671             /* This is SPACE or DEL.  */
3672             charset = CHARSET_FROM_ID (charset_ascii);
3673           else
3674             charset = CHARSET_FROM_ID (charset_id_0);
3675           break;
3676
3677         case ISO_graphic_plane_0:
3678           if (charset_id_0 < 0)
3679             charset = CHARSET_FROM_ID (charset_ascii);
3680           else
3681             charset = CHARSET_FROM_ID (charset_id_0);
3682           break;
3683
3684         case ISO_0xA0_or_0xFF:
3685           if (charset_id_1 < 0
3686               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3687               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3688             goto invalid_code;
3689           /* This is a graphic character, we fall down ... */
3690
3691         case ISO_graphic_plane_1:
3692           if (charset_id_1 < 0)
3693             goto invalid_code;
3694           charset = CHARSET_FROM_ID (charset_id_1);
3695           break;
3696
3697         case ISO_control_0:
3698           if (eol_crlf && c1 == '\r')
3699             ONE_MORE_BYTE (byte_after_cr);
3700           MAYBE_FINISH_COMPOSITION ();
3701           charset = CHARSET_FROM_ID (charset_ascii);
3702           break;
3703
3704         case ISO_control_1:
3705           goto invalid_code;
3706
3707         case ISO_shift_out:
3708           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3709               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3710             goto invalid_code;
3711           CODING_ISO_INVOCATION (coding, 0) = 1;
3712           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3713           continue;
3714
3715         case ISO_shift_in:
3716           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3717             goto invalid_code;
3718           CODING_ISO_INVOCATION (coding, 0) = 0;
3719           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3720           continue;
3721
3722         case ISO_single_shift_2_7:
3723           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3724             goto invalid_code;
3725         case ISO_single_shift_2:
3726           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3727             goto invalid_code;
3728           /* SS2 is handled as an escape sequence of ESC 'N' */
3729           c1 = 'N';
3730           goto label_escape_sequence;
3731
3732         case ISO_single_shift_3:
3733           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3734             goto invalid_code;
3735           /* SS2 is handled as an escape sequence of ESC 'O' */
3736           c1 = 'O';
3737           goto label_escape_sequence;
3738
3739         case ISO_control_sequence_introducer:
3740           /* CSI is handled as an escape sequence of ESC '[' ...  */
3741           c1 = '[';
3742           goto label_escape_sequence;
3743
3744         case ISO_escape:
3745           ONE_MORE_BYTE (c1);
3746         label_escape_sequence:
3747           /* Escape sequences handled here are invocation,
3748              designation, direction specification, and character
3749              composition specification.  */
3750           switch (c1)
3751             {
3752             case '&':           /* revision of following character set */
3753               ONE_MORE_BYTE (c1);
3754               if (!(c1 >= '@' && c1 <= '~'))
3755                 goto invalid_code;
3756               ONE_MORE_BYTE (c1);
3757               if (c1 != ISO_CODE_ESC)
3758                 goto invalid_code;
3759               ONE_MORE_BYTE (c1);
3760               goto label_escape_sequence;
3761
3762             case '$':           /* designation of 2-byte character set */
3763               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3764                 goto invalid_code;
3765               {
3766                 int reg, chars96;
3767
3768                 ONE_MORE_BYTE (c1);
3769                 if (c1 >= '@' && c1 <= 'B')
3770                   {     /* designation of JISX0208.1978, GB2312.1980,
3771                            or JISX0208.1980 */
3772                     reg = 0, chars96 = 0;
3773                   }
3774                 else if (c1 >= 0x28 && c1 <= 0x2B)
3775                   { /* designation of DIMENSION2_CHARS94 character set */
3776                     reg = c1 - 0x28, chars96 = 0;
3777                     ONE_MORE_BYTE (c1);
3778                   }
3779                 else if (c1 >= 0x2C && c1 <= 0x2F)
3780                   { /* designation of DIMENSION2_CHARS96 character set */
3781                     reg = c1 - 0x2C, chars96 = 1;
3782                     ONE_MORE_BYTE (c1);
3783                   }
3784                 else
3785                   goto invalid_code;
3786                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3787                 /* We must update these variables now.  */
3788                 if (reg == 0)
3789                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3790                 else if (reg == 1)
3791                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3792                 if (chars96 < 0)
3793                   goto invalid_code;
3794               }
3795               continue;
3796
3797             case 'n':           /* invocation of locking-shift-2 */
3798               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3799                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3800                 goto invalid_code;
3801               CODING_ISO_INVOCATION (coding, 0) = 2;
3802               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3803               continue;
3804
3805             case 'o':           /* invocation of locking-shift-3 */
3806               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3807                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3808                 goto invalid_code;
3809               CODING_ISO_INVOCATION (coding, 0) = 3;
3810               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3811               continue;
3812
3813             case 'N':           /* invocation of single-shift-2 */
3814               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3815                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3816                 goto invalid_code;
3817               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3818               if (charset_id_2 < 0)
3819                 charset = CHARSET_FROM_ID (charset_ascii);
3820               else
3821                 charset = CHARSET_FROM_ID (charset_id_2);
3822               ONE_MORE_BYTE (c1);
3823               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3824                 goto invalid_code;
3825               break;
3826
3827             case 'O':           /* invocation of single-shift-3 */
3828               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3829                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3830                 goto invalid_code;
3831               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3832               if (charset_id_3 < 0)
3833                 charset = CHARSET_FROM_ID (charset_ascii);
3834               else
3835                 charset = CHARSET_FROM_ID (charset_id_3);
3836               ONE_MORE_BYTE (c1);
3837               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3838                 goto invalid_code;
3839               break;
3840
3841             case '0': case '2': case '3': case '4': /* start composition */
3842               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3843                 goto invalid_code;
3844               if (last_id != charset_ascii)
3845                 {
3846                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3847                   last_id = charset_ascii;
3848                   last_offset = char_offset;
3849                 }
3850               DECODE_COMPOSITION_START (c1);
3851               continue;
3852
3853             case '1':           /* end composition */
3854               if (cmp_status->state == COMPOSING_NO)
3855                 goto invalid_code;
3856               DECODE_COMPOSITION_END ();
3857               continue;
3858
3859             case '[':           /* specification of direction */
3860               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3861                 goto invalid_code;
3862               /* For the moment, nested direction is not supported.
3863                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3864                  left-to-right, and nozero means right-to-left.  */
3865               ONE_MORE_BYTE (c1);
3866               switch (c1)
3867                 {
3868                 case ']':       /* end of the current direction */
3869                   coding->mode &= ~CODING_MODE_DIRECTION;
3870
3871                 case '0':       /* end of the current direction */
3872                 case '1':       /* start of left-to-right direction */
3873                   ONE_MORE_BYTE (c1);
3874                   if (c1 == ']')
3875                     coding->mode &= ~CODING_MODE_DIRECTION;
3876                   else
3877                     goto invalid_code;
3878                   break;
3879
3880                 case '2':       /* start of right-to-left direction */
3881                   ONE_MORE_BYTE (c1);
3882                   if (c1 == ']')
3883                     coding->mode |= CODING_MODE_DIRECTION;
3884                   else
3885                     goto invalid_code;
3886                   break;
3887
3888                 default:
3889                   goto invalid_code;
3890                 }
3891               continue;
3892
3893             case '%':
3894               ONE_MORE_BYTE (c1);
3895               if (c1 == '/')
3896                 {
3897                   /* CTEXT extended segment:
3898                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3899                      We keep these bytes as is for the moment.
3900                      They may be decoded by post-read-conversion.  */
3901                   int dim, M, L;
3902                   int size;
3903
3904                   ONE_MORE_BYTE (dim);
3905                   if (dim < '0' || dim > '4')
3906                     goto invalid_code;
3907                   ONE_MORE_BYTE (M);
3908                   if (M < 128)
3909                     goto invalid_code;
3910                   ONE_MORE_BYTE (L);
3911                   if (L < 128)
3912                     goto invalid_code;
3913                   size = ((M - 128) * 128) + (L - 128);
3914                   if (charbuf + 6 > charbuf_end)
3915                     goto break_loop;
3916                   *charbuf++ = ISO_CODE_ESC;
3917                   *charbuf++ = '%';
3918                   *charbuf++ = '/';
3919                   *charbuf++ = dim;
3920                   *charbuf++ = BYTE8_TO_CHAR (M);
3921                   *charbuf++ = BYTE8_TO_CHAR (L);
3922                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3923                 }
3924               else if (c1 == 'G')
3925                 {
3926                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3927                      ESC % G --UTF-8-BYTES-- ESC % @
3928                      We keep these bytes as is for the moment.
3929                      They may be decoded by post-read-conversion.  */
3930                   if (charbuf + 3 > charbuf_end)
3931                     goto break_loop;
3932                   *charbuf++ = ISO_CODE_ESC;
3933                   *charbuf++ = '%';
3934                   *charbuf++ = 'G';
3935                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3936                 }
3937               else
3938                 goto invalid_code;
3939               continue;
3940               break;
3941
3942             default:
3943               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3944                 goto invalid_code;
3945               {
3946                 int reg, chars96;
3947
3948                 if (c1 >= 0x28 && c1 <= 0x2B)
3949                   { /* designation of DIMENSION1_CHARS94 character set */
3950                     reg = c1 - 0x28, chars96 = 0;
3951                     ONE_MORE_BYTE (c1);
3952                   }
3953                 else if (c1 >= 0x2C && c1 <= 0x2F)
3954                   { /* designation of DIMENSION1_CHARS96 character set */
3955                     reg = c1 - 0x2C, chars96 = 1;
3956                     ONE_MORE_BYTE (c1);
3957                   }
3958                 else
3959                   goto invalid_code;
3960                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3961                 /* We must update these variables now.  */
3962                 if (reg == 0)
3963                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3964                 else if (reg == 1)
3965                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3966                 if (chars96 < 0)
3967                   goto invalid_code;
3968               }
3969               continue;
3970             }
3971         }
3972
3973       if (cmp_status->state == COMPOSING_NO
3974           && charset->id != charset_ascii
3975           && last_id != charset->id)
3976         {
3977           if (last_id != charset_ascii)
3978             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3979           last_id = charset->id;
3980           last_offset = char_offset;
3981         }
3982
3983       /* Now we know CHARSET and 1st position code C1 of a character.
3984          Produce a decoded character while getting 2nd and 3rd
3985          position codes C2, C3 if necessary.  */
3986       if (CHARSET_DIMENSION (charset) > 1)
3987         {
3988           ONE_MORE_BYTE (c2);
3989           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3990               || ((c1 & 0x80) != (c2 & 0x80)))
3991             /* C2 is not in a valid range.  */
3992             goto invalid_code;
3993           if (CHARSET_DIMENSION (charset) == 2)
3994             c1 = (c1 << 8) | c2;
3995           else
3996             {
3997               ONE_MORE_BYTE (c3);
3998               if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
3999                   || ((c1 & 0x80) != (c3 & 0x80)))
4000                 /* C3 is not in a valid range.  */
4001                 goto invalid_code;
4002               c1 = (c1 << 16) | (c2 << 8) | c2;
4003             }
4004         }
4005       c1 &= 0x7F7F7F;
4006       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4007       if (c < 0)
4008         {
4009           MAYBE_FINISH_COMPOSITION ();
4010           for (; src_base < src; src_base++, char_offset++)
4011             {
4012               if (ASCII_BYTE_P (*src_base))
4013                 *charbuf++ = *src_base;
4014               else
4015                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4016             }
4017         }
4018       else if (cmp_status->state == COMPOSING_NO)
4019         {
4020           *charbuf++ = c;
4021           char_offset++;
4022         }
4023       else if ((cmp_status->state == COMPOSING_CHAR
4024                 ? cmp_status->nchars
4025                 : cmp_status->ncomps)
4026                >= MAX_COMPOSITION_COMPONENTS)
4027         {
4028           /* Too long composition.  */
4029           MAYBE_FINISH_COMPOSITION ();
4030           *charbuf++ = c;
4031           char_offset++;
4032         }
4033       else
4034         STORE_COMPOSITION_CHAR (c);
4035       continue;
4036
4037     invalid_code:
4038       MAYBE_FINISH_COMPOSITION ();
4039       src = src_base;
4040       consumed_chars = consumed_chars_base;
4041       ONE_MORE_BYTE (c);
4042       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4043       char_offset++;
4044       coding->errors++;
4045       continue;
4046
4047     break_loop:
4048       break;
4049     }
4050
4051  no_more_source:
4052   if (cmp_status->state != COMPOSING_NO)
4053     {
4054       if (coding->mode & CODING_MODE_LAST_BLOCK)
4055         MAYBE_FINISH_COMPOSITION ();
4056       else
4057         {
4058           charbuf -= cmp_status->length;
4059           for (i = 0; i < cmp_status->length; i++)
4060             cmp_status->carryover[i] = charbuf[i];
4061         }
4062     }
4063   else if (last_id != charset_ascii)
4064     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4065   coding->consumed_char += consumed_chars_base;
4066   coding->consumed = src_base - coding->source;
4067   coding->charbuf_used = charbuf - coding->charbuf;
4068 }
4069
4070
4071 /* ISO2022 encoding stuff.  */
4072
4073 /*
4074    It is not enough to say just "ISO2022" on encoding, we have to
4075    specify more details.  In Emacs, each coding system of ISO2022
4076    variant has the following specifications:
4077         1. Initial designation to G0 thru G3.
4078         2. Allows short-form designation?
4079         3. ASCII should be designated to G0 before control characters?
4080         4. ASCII should be designated to G0 at end of line?
4081         5. 7-bit environment or 8-bit environment?
4082         6. Use locking-shift?
4083         7. Use Single-shift?
4084    And the following two are only for Japanese:
4085         8. Use ASCII in place of JIS0201-1976-Roman?
4086         9. Use JISX0208-1983 in place of JISX0208-1978?
4087    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4088    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4089    details.
4090 */
4091
4092 /* Produce codes (escape sequence) for designating CHARSET to graphic
4093    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4094    '@', 'A', or 'B' and the coding system CODING allows, produce
4095    designation sequence of short-form.  */
4096
4097 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4098   do {                                                                  \
4099     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4100     const char *intermediate_char_94 = "()*+";                          \
4101     const char *intermediate_char_96 = ",-./";                          \
4102     int revision = -1;                                                  \
4103     int c;                                                              \
4104                                                                         \
4105     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4106       revision = CHARSET_ISO_REVISION (charset);                        \
4107                                                                         \
4108     if (revision >= 0)                                                  \
4109       {                                                                 \
4110         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4111         EMIT_ONE_BYTE ('@' + revision);                                 \
4112       }                                                                 \
4113     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4114     if (CHARSET_DIMENSION (charset) == 1)                               \
4115       {                                                                 \
4116         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4117           c = intermediate_char_94[reg];                                \
4118         else                                                            \
4119           c = intermediate_char_96[reg];                                \
4120         EMIT_ONE_ASCII_BYTE (c);                                        \
4121       }                                                                 \
4122     else                                                                \
4123       {                                                                 \
4124         EMIT_ONE_ASCII_BYTE ('$');                                      \
4125         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4126           {                                                             \
4127             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4128                 || reg != 0                                             \
4129                 || final_char < '@' || final_char > 'B')                \
4130               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4131           }                                                             \
4132         else                                                            \
4133           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4134       }                                                                 \
4135     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4136                                                                         \
4137     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4138   } while (0)
4139
4140
4141 /* The following two macros produce codes (control character or escape
4142    sequence) for ISO2022 single-shift functions (single-shift-2 and
4143    single-shift-3).  */
4144
4145 #define ENCODE_SINGLE_SHIFT_2                                           \
4146   do {                                                                  \
4147     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4148       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4149     else                                                                \
4150       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4151     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4152   } while (0)
4153
4154
4155 #define ENCODE_SINGLE_SHIFT_3                                           \
4156   do {                                                                  \
4157     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4158       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4159     else                                                                \
4160       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4161     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4162   } while (0)
4163
4164
4165 /* The following four macros produce codes (control character or
4166    escape sequence) for ISO2022 locking-shift functions (shift-in,
4167    shift-out, locking-shift-2, and locking-shift-3).  */
4168
4169 #define ENCODE_SHIFT_IN                                 \
4170   do {                                                  \
4171     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4172     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4173   } while (0)
4174
4175
4176 #define ENCODE_SHIFT_OUT                                \
4177   do {                                                  \
4178     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4179     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4180   } while (0)
4181
4182
4183 #define ENCODE_LOCKING_SHIFT_2                          \
4184   do {                                                  \
4185     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4186     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4187   } while (0)
4188
4189
4190 #define ENCODE_LOCKING_SHIFT_3                          \
4191   do {                                                  \
4192     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4193     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4194   } while (0)
4195
4196
4197 /* Produce codes for a DIMENSION1 character whose character set is
4198    CHARSET and whose position-code is C1.  Designation and invocation
4199    sequences are also produced in advance if necessary.  */
4200
4201 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4202   do {                                                                  \
4203     int id = CHARSET_ID (charset);                                      \
4204                                                                         \
4205     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4206         && id == charset_ascii)                                         \
4207       {                                                                 \
4208         id = charset_jisx0201_roman;                                    \
4209         charset = CHARSET_FROM_ID (id);                                 \
4210       }                                                                 \
4211                                                                         \
4212     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4213       {                                                                 \
4214         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4215           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4216         else                                                            \
4217           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4218         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4219         break;                                                          \
4220       }                                                                 \
4221     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4222       {                                                                 \
4223         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4224         break;                                                          \
4225       }                                                                 \
4226     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4227       {                                                                 \
4228         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4229         break;                                                          \
4230       }                                                                 \
4231     else                                                                \
4232       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4233          must invoke it, or, at first, designate it to some graphic     \
4234          register.  Then repeat the loop to actually produce the        \
4235          character.  */                                                 \
4236       dst = encode_invocation_designation (charset, coding, dst,        \
4237                                            &produced_chars);            \
4238   } while (1)
4239
4240
4241 /* Produce codes for a DIMENSION2 character whose character set is
4242    CHARSET and whose position-codes are C1 and C2.  Designation and
4243    invocation codes are also produced in advance if necessary.  */
4244
4245 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4246   do {                                                                  \
4247     int id = CHARSET_ID (charset);                                      \
4248                                                                         \
4249     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4250         && id == charset_jisx0208)                                      \
4251       {                                                                 \
4252         id = charset_jisx0208_1978;                                     \
4253         charset = CHARSET_FROM_ID (id);                                 \
4254       }                                                                 \
4255                                                                         \
4256     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4257       {                                                                 \
4258         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4259           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4260         else                                                            \
4261           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4262         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4263         break;                                                          \
4264       }                                                                 \
4265     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4266       {                                                                 \
4267         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4268         break;                                                          \
4269       }                                                                 \
4270     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4271       {                                                                 \
4272         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4273         break;                                                          \
4274       }                                                                 \
4275     else                                                                \
4276       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4277          must invoke it, or, at first, designate it to some graphic     \
4278          register.  Then repeat the loop to actually produce the        \
4279          character.  */                                                 \
4280       dst = encode_invocation_designation (charset, coding, dst,        \
4281                                            &produced_chars);            \
4282   } while (1)
4283
4284
4285 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4286   do {                                                                     \
4287     int code = ENCODE_CHAR ((charset), (c));                               \
4288                                                                            \
4289     if (CHARSET_DIMENSION (charset) == 1)                                  \
4290       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4291     else                                                                   \
4292       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4293   } while (0)
4294
4295
4296 /* Produce designation and invocation codes at a place pointed by DST
4297    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4298    Return new DST.  */
4299
4300 unsigned char *
4301 encode_invocation_designation (struct charset *charset,
4302                                struct coding_system *coding,
4303                                unsigned char *dst, int *p_nchars)
4304 {
4305   int multibytep = coding->dst_multibyte;
4306   int produced_chars = *p_nchars;
4307   int reg;                      /* graphic register number */
4308   int id = CHARSET_ID (charset);
4309
4310   /* At first, check designations.  */
4311   for (reg = 0; reg < 4; reg++)
4312     if (id == CODING_ISO_DESIGNATION (coding, reg))
4313       break;
4314
4315   if (reg >= 4)
4316     {
4317       /* CHARSET is not yet designated to any graphic registers.  */
4318       /* At first check the requested designation.  */
4319       reg = CODING_ISO_REQUEST (coding, id);
4320       if (reg < 0)
4321         /* Since CHARSET requests no special designation, designate it
4322            to graphic register 0.  */
4323         reg = 0;
4324
4325       ENCODE_DESIGNATION (charset, reg, coding);
4326     }
4327
4328   if (CODING_ISO_INVOCATION (coding, 0) != reg
4329       && CODING_ISO_INVOCATION (coding, 1) != reg)
4330     {
4331       /* Since the graphic register REG is not invoked to any graphic
4332          planes, invoke it to graphic plane 0.  */
4333       switch (reg)
4334         {
4335         case 0:                 /* graphic register 0 */
4336           ENCODE_SHIFT_IN;
4337           break;
4338
4339         case 1:                 /* graphic register 1 */
4340           ENCODE_SHIFT_OUT;
4341           break;
4342
4343         case 2:                 /* graphic register 2 */
4344           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4345             ENCODE_SINGLE_SHIFT_2;
4346           else
4347             ENCODE_LOCKING_SHIFT_2;
4348           break;
4349
4350         case 3:                 /* graphic register 3 */
4351           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4352             ENCODE_SINGLE_SHIFT_3;
4353           else
4354             ENCODE_LOCKING_SHIFT_3;
4355           break;
4356         }
4357     }
4358
4359   *p_nchars = produced_chars;
4360   return dst;
4361 }
4362
4363 /* The following three macros produce codes for indicating direction
4364    of text.  */
4365 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
4366   do {                                                                  \
4367     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
4368       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
4369     else                                                                \
4370       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
4371   } while (0)
4372
4373
4374 #define ENCODE_DIRECTION_R2L()                  \
4375   do {                                          \
4376     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4377     EMIT_TWO_ASCII_BYTES ('2', ']');            \
4378   } while (0)
4379
4380
4381 #define ENCODE_DIRECTION_L2R()                  \
4382   do {                                          \
4383     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4384     EMIT_TWO_ASCII_BYTES ('0', ']');            \
4385   } while (0)
4386
4387
4388 /* Produce codes for designation and invocation to reset the graphic
4389    planes and registers to initial state.  */
4390 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4391   do {                                                                  \
4392     int reg;                                                            \
4393     struct charset *charset;                                            \
4394                                                                         \
4395     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4396       ENCODE_SHIFT_IN;                                                  \
4397     for (reg = 0; reg < 4; reg++)                                       \
4398       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4399           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4400               != CODING_ISO_INITIAL (coding, reg)))                     \
4401         {                                                               \
4402           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4403           ENCODE_DESIGNATION (charset, reg, coding);                    \
4404         }                                                               \
4405   } while (0)
4406
4407
4408 /* Produce designation sequences of charsets in the line started from
4409    SRC to a place pointed by DST, and return updated DST.
4410
4411    If the current block ends before any end-of-line, we may fail to
4412    find all the necessary designations.  */
4413
4414 static unsigned char *
4415 encode_designation_at_bol (struct coding_system *coding, int *charbuf,
4416                            int *charbuf_end, unsigned char *dst)
4417 {
4418   struct charset *charset;
4419   /* Table of charsets to be designated to each graphic register.  */
4420   int r[4];
4421   int c, found = 0, reg;
4422   int produced_chars = 0;
4423   int multibytep = coding->dst_multibyte;
4424   Lisp_Object attrs;
4425   Lisp_Object charset_list;
4426
4427   attrs = CODING_ID_ATTRS (coding->id);
4428   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4429   if (EQ (charset_list, Qiso_2022))
4430     charset_list = Viso_2022_charset_list;
4431
4432   for (reg = 0; reg < 4; reg++)
4433     r[reg] = -1;
4434
4435   while (found < 4)
4436     {
4437       int id;
4438
4439       c = *charbuf++;
4440       if (c == '\n')
4441         break;
4442       charset = char_charset (c, charset_list, NULL);
4443       id = CHARSET_ID (charset);
4444       reg = CODING_ISO_REQUEST (coding, id);
4445       if (reg >= 0 && r[reg] < 0)
4446         {
4447           found++;
4448           r[reg] = id;
4449         }
4450     }
4451
4452   if (found)
4453     {
4454       for (reg = 0; reg < 4; reg++)
4455         if (r[reg] >= 0
4456             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4457           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4458     }
4459
4460   return dst;
4461 }
4462
4463 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4464
4465 static int
4466 encode_coding_iso_2022 (struct coding_system *coding)
4467 {
4468   int multibytep = coding->dst_multibyte;
4469   int *charbuf = coding->charbuf;
4470   int *charbuf_end = charbuf + coding->charbuf_used;
4471   unsigned char *dst = coding->destination + coding->produced;
4472   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4473   int safe_room = 16;
4474   int bol_designation
4475     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4476        && CODING_ISO_BOL (coding));
4477   int produced_chars = 0;
4478   Lisp_Object attrs, eol_type, charset_list;
4479   int ascii_compatible;
4480   int c;
4481   int preferred_charset_id = -1;
4482
4483   CODING_GET_INFO (coding, attrs, charset_list);
4484   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4485   if (VECTORP (eol_type))
4486     eol_type = Qunix;
4487
4488   setup_iso_safe_charsets (attrs);
4489   /* Charset list may have been changed.  */
4490   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4491   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4492
4493   ascii_compatible
4494     = (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
4495        && ! (CODING_ISO_FLAGS (coding) & (CODING_ISO_FLAG_DESIGNATION
4496                                           | CODING_ISO_FLAG_LOCKING_SHIFT)));
4497
4498   while (charbuf < charbuf_end)
4499     {
4500       ASSURE_DESTINATION (safe_room);
4501
4502       if (bol_designation)
4503         {
4504           unsigned char *dst_prev = dst;
4505
4506           /* We have to produce designation sequences if any now.  */
4507           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4508           bol_designation = 0;
4509           /* We are sure that designation sequences are all ASCII bytes.  */
4510           produced_chars += dst - dst_prev;
4511         }
4512
4513       c = *charbuf++;
4514
4515       if (c < 0)
4516         {
4517           /* Handle an annotation.  */
4518           switch (*charbuf)
4519             {
4520             case CODING_ANNOTATE_COMPOSITION_MASK:
4521               /* Not yet implemented.  */
4522               break;
4523             case CODING_ANNOTATE_CHARSET_MASK:
4524               preferred_charset_id = charbuf[2];
4525               if (preferred_charset_id >= 0
4526                   && NILP (Fmemq (make_number (preferred_charset_id),
4527                                   charset_list)))
4528                 preferred_charset_id = -1;
4529               break;
4530             default:
4531               abort ();
4532             }
4533           charbuf += -c - 1;
4534           continue;
4535         }
4536
4537       /* Now encode the character C.  */
4538       if (c < 0x20 || c == 0x7F)
4539         {
4540           if (c == '\n'
4541               || (c == '\r' && EQ (eol_type, Qmac)))
4542             {
4543               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4544                 ENCODE_RESET_PLANE_AND_REGISTER ();
4545               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4546                 {
4547                   int i;
4548
4549                   for (i = 0; i < 4; i++)
4550                     CODING_ISO_DESIGNATION (coding, i)
4551                       = CODING_ISO_INITIAL (coding, i);
4552                 }
4553               bol_designation
4554                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4555             }
4556           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4557             ENCODE_RESET_PLANE_AND_REGISTER ();
4558           EMIT_ONE_ASCII_BYTE (c);
4559         }
4560       else if (ASCII_CHAR_P (c))
4561         {
4562           if (ascii_compatible)
4563             EMIT_ONE_ASCII_BYTE (c);
4564           else
4565             {
4566               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4567               ENCODE_ISO_CHARACTER (charset, c);
4568             }
4569         }
4570       else if (CHAR_BYTE8_P (c))
4571         {
4572           c = CHAR_TO_BYTE8 (c);
4573           EMIT_ONE_BYTE (c);
4574         }
4575       else
4576         {
4577           struct charset *charset;
4578
4579           if (preferred_charset_id >= 0)
4580             {
4581               charset = CHARSET_FROM_ID (preferred_charset_id);
4582               if (! CHAR_CHARSET_P (c, charset))
4583                 charset = char_charset (c, charset_list, NULL);
4584             }
4585           else
4586             charset = char_charset (c, charset_list, NULL);
4587           if (!charset)
4588             {
4589               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4590                 {
4591                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4592                   charset = CHARSET_FROM_ID (charset_ascii);
4593                 }
4594               else
4595                 {
4596                   c = coding->default_char;
4597                   charset = char_charset (c, charset_list, NULL);
4598                 }
4599             }
4600           ENCODE_ISO_CHARACTER (charset, c);
4601         }
4602     }
4603
4604   if (coding->mode & CODING_MODE_LAST_BLOCK
4605       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4606     {
4607       ASSURE_DESTINATION (safe_room);
4608       ENCODE_RESET_PLANE_AND_REGISTER ();
4609     }
4610   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4611   CODING_ISO_BOL (coding) = bol_designation;
4612   coding->produced_char += produced_chars;
4613   coding->produced = dst - coding->destination;
4614   return 0;
4615 }
4616
4617 \f
4618 /*** 8,9. SJIS and BIG5 handlers ***/
4619
4620 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4621    quite widely.  So, for the moment, Emacs supports them in the bare
4622    C code.  But, in the future, they may be supported only by CCL.  */
4623
4624 /* SJIS is a coding system encoding three character sets: ASCII, right
4625    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4626    as is.  A character of charset katakana-jisx0201 is encoded by
4627    "position-code + 0x80".  A character of charset japanese-jisx0208
4628    is encoded in 2-byte but two position-codes are divided and shifted
4629    so that it fit in the range below.
4630
4631    --- CODE RANGE of SJIS ---
4632    (character set)      (range)
4633    ASCII                0x00 .. 0x7F
4634    KATAKANA-JISX0201    0xA0 .. 0xDF
4635    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4636             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4637    -------------------------------
4638
4639 */
4640
4641 /* BIG5 is a coding system encoding two character sets: ASCII and
4642    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4643    character set and is encoded in two-byte.
4644
4645    --- CODE RANGE of BIG5 ---
4646    (character set)      (range)
4647    ASCII                0x00 .. 0x7F
4648    Big5 (1st byte)      0xA1 .. 0xFE
4649         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4650    --------------------------
4651
4652   */
4653
4654 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4655    Check if a text is encoded in SJIS.  If it is, return
4656    CATEGORY_MASK_SJIS, else return 0.  */
4657
4658 static int
4659 detect_coding_sjis (struct coding_system *coding,
4660                     struct coding_detection_info *detect_info)
4661 {
4662   const unsigned char *src = coding->source, *src_base;
4663   const unsigned char *src_end = coding->source + coding->src_bytes;
4664   int multibytep = coding->src_multibyte;
4665   int consumed_chars = 0;
4666   int found = 0;
4667   int c;
4668   Lisp_Object attrs, charset_list;
4669   int max_first_byte_of_2_byte_code;
4670
4671   CODING_GET_INFO (coding, attrs, charset_list);
4672   max_first_byte_of_2_byte_code
4673     = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4674
4675   detect_info->checked |= CATEGORY_MASK_SJIS;
4676   /* A coding system of this category is always ASCII compatible.  */
4677   src += coding->head_ascii;
4678
4679   while (1)
4680     {
4681       src_base = src;
4682       ONE_MORE_BYTE (c);
4683       if (c < 0x80)
4684         continue;
4685       if ((c >= 0x81 && c <= 0x9F)
4686           || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4687         {
4688           ONE_MORE_BYTE (c);
4689           if (c < 0x40 || c == 0x7F || c > 0xFC)
4690             break;
4691           found = CATEGORY_MASK_SJIS;
4692         }
4693       else if (c >= 0xA0 && c < 0xE0)
4694         found = CATEGORY_MASK_SJIS;
4695       else
4696         break;
4697     }
4698   detect_info->rejected |= CATEGORY_MASK_SJIS;
4699   return 0;
4700
4701  no_more_source:
4702   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4703     {
4704       detect_info->rejected |= CATEGORY_MASK_SJIS;
4705       return 0;
4706     }
4707   detect_info->found |= found;
4708   return 1;
4709 }
4710
4711 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4712    Check if a text is encoded in BIG5.  If it is, return
4713    CATEGORY_MASK_BIG5, else return 0.  */
4714
4715 static int
4716 detect_coding_big5 (struct coding_system *coding,
4717                     struct coding_detection_info *detect_info)
4718 {
4719   const unsigned char *src = coding->source, *src_base;
4720   const unsigned char *src_end = coding->source + coding->src_bytes;
4721   int multibytep = coding->src_multibyte;
4722   int consumed_chars = 0;
4723   int found = 0;
4724   int c;
4725
4726   detect_info->checked |= CATEGORY_MASK_BIG5;
4727   /* A coding system of this category is always ASCII compatible.  */
4728   src += coding->head_ascii;
4729
4730   while (1)
4731     {
4732       src_base = src;
4733       ONE_MORE_BYTE (c);
4734       if (c < 0x80)
4735         continue;
4736       if (c >= 0xA1)
4737         {
4738           ONE_MORE_BYTE (c);
4739           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4740             return 0;
4741           found = CATEGORY_MASK_BIG5;
4742         }
4743       else
4744         break;
4745     }
4746   detect_info->rejected |= CATEGORY_MASK_BIG5;
4747   return 0;
4748
4749  no_more_source:
4750   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4751     {
4752       detect_info->rejected |= CATEGORY_MASK_BIG5;
4753       return 0;
4754     }
4755   detect_info->found |= found;
4756   return 1;
4757 }
4758
4759 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4760    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4761
4762 static void
4763 decode_coding_sjis (struct coding_system *coding)
4764 {
4765   const unsigned char *src = coding->source + coding->consumed;
4766   const unsigned char *src_end = coding->source + coding->src_bytes;
4767   const unsigned char *src_base;
4768   int *charbuf = coding->charbuf + coding->charbuf_used;
4769   /* We may produce one charset annocation in one loop and one more at
4770      the end.  */
4771   int *charbuf_end
4772     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4773   int consumed_chars = 0, consumed_chars_base;
4774   int multibytep = coding->src_multibyte;
4775   struct charset *charset_roman, *charset_kanji, *charset_kana;
4776   struct charset *charset_kanji2;
4777   Lisp_Object attrs, charset_list, val;
4778   int char_offset = coding->produced_char;
4779   int last_offset = char_offset;
4780   int last_id = charset_ascii;
4781   int eol_crlf =
4782     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4783   int byte_after_cr = -1;
4784
4785   CODING_GET_INFO (coding, attrs, charset_list);
4786
4787   val = charset_list;
4788   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4789   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4790   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4791   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4792
4793   while (1)
4794     {
4795       int c, c1;
4796       struct charset *charset;
4797
4798       src_base = src;
4799       consumed_chars_base = consumed_chars;
4800
4801       if (charbuf >= charbuf_end)
4802         {
4803           if (byte_after_cr >= 0)
4804             src_base--;
4805           break;
4806         }
4807
4808       if (byte_after_cr >= 0)
4809         c = byte_after_cr, byte_after_cr = -1;
4810       else
4811         ONE_MORE_BYTE (c);
4812       if (c < 0)
4813         goto invalid_code;
4814       if (c < 0x80)
4815         {
4816           if (eol_crlf && c == '\r')
4817             ONE_MORE_BYTE (byte_after_cr);
4818           charset = charset_roman;
4819         }
4820       else if (c == 0x80 || c == 0xA0)
4821         goto invalid_code;
4822       else if (c >= 0xA1 && c <= 0xDF)
4823         {
4824           /* SJIS -> JISX0201-Kana */
4825           c &= 0x7F;
4826           charset = charset_kana;
4827         }
4828       else if (c <= 0xEF)
4829         {
4830           /* SJIS -> JISX0208 */
4831           ONE_MORE_BYTE (c1);
4832           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4833             goto invalid_code;
4834           c = (c << 8) | c1;
4835           SJIS_TO_JIS (c);
4836           charset = charset_kanji;
4837         }
4838       else if (c <= 0xFC && charset_kanji2)
4839         {
4840           /* SJIS -> JISX0213-2 */
4841           ONE_MORE_BYTE (c1);
4842           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4843             goto invalid_code;
4844           c = (c << 8) | c1;
4845           SJIS_TO_JIS2 (c);
4846           charset = charset_kanji2;
4847         }
4848       else
4849         goto invalid_code;
4850       if (charset->id != charset_ascii
4851           && last_id != charset->id)
4852         {
4853           if (last_id != charset_ascii)
4854             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4855           last_id = charset->id;
4856           last_offset = char_offset;
4857         }
4858       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4859       *charbuf++ = c;
4860       char_offset++;
4861       continue;
4862
4863     invalid_code:
4864       src = src_base;
4865       consumed_chars = consumed_chars_base;
4866       ONE_MORE_BYTE (c);
4867       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4868       char_offset++;
4869       coding->errors++;
4870     }
4871
4872  no_more_source:
4873   if (last_id != charset_ascii)
4874     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4875   coding->consumed_char += consumed_chars_base;
4876   coding->consumed = src_base - coding->source;
4877   coding->charbuf_used = charbuf - coding->charbuf;
4878 }
4879
4880 static void
4881 decode_coding_big5 (struct coding_system *coding)
4882 {
4883   const unsigned char *src = coding->source + coding->consumed;
4884   const unsigned char *src_end = coding->source + coding->src_bytes;
4885   const unsigned char *src_base;
4886   int *charbuf = coding->charbuf + coding->charbuf_used;
4887   /* We may produce one charset annocation in one loop and one more at
4888      the end.  */
4889   int *charbuf_end
4890     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4891   int consumed_chars = 0, consumed_chars_base;
4892   int multibytep = coding->src_multibyte;
4893   struct charset *charset_roman, *charset_big5;
4894   Lisp_Object attrs, charset_list, val;
4895   int char_offset = coding->produced_char;
4896   int last_offset = char_offset;
4897   int last_id = charset_ascii;
4898   int eol_crlf =
4899     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4900   int byte_after_cr = -1;
4901
4902   CODING_GET_INFO (coding, attrs, charset_list);
4903   val = charset_list;
4904   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4905   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4906
4907   while (1)
4908     {
4909       int c, c1;
4910       struct charset *charset;
4911
4912       src_base = src;
4913       consumed_chars_base = consumed_chars;
4914
4915       if (charbuf >= charbuf_end)
4916         {
4917           if (byte_after_cr >= 0)
4918             src_base--;
4919           break;
4920         }
4921
4922       if (byte_after_cr >= 0)
4923         c = byte_after_cr, byte_after_cr = -1;
4924       else
4925         ONE_MORE_BYTE (c);
4926
4927       if (c < 0)
4928         goto invalid_code;
4929       if (c < 0x80)
4930         {
4931           if (eol_crlf && c == '\r')
4932             ONE_MORE_BYTE (byte_after_cr);
4933           charset = charset_roman;
4934         }
4935       else
4936         {
4937           /* BIG5 -> Big5 */
4938           if (c < 0xA1 || c > 0xFE)
4939             goto invalid_code;
4940           ONE_MORE_BYTE (c1);
4941           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4942             goto invalid_code;
4943           c = c << 8 | c1;
4944           charset = charset_big5;
4945         }
4946       if (charset->id != charset_ascii
4947           && last_id != charset->id)
4948         {
4949           if (last_id != charset_ascii)
4950             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4951           last_id = charset->id;
4952           last_offset = char_offset;
4953         }
4954       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4955       *charbuf++ = c;
4956       char_offset++;
4957       continue;
4958
4959     invalid_code:
4960       src = src_base;
4961       consumed_chars = consumed_chars_base;
4962       ONE_MORE_BYTE (c);
4963       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4964       char_offset++;
4965       coding->errors++;
4966     }
4967
4968  no_more_source:
4969   if (last_id != charset_ascii)
4970     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4971   coding->consumed_char += consumed_chars_base;
4972   coding->consumed = src_base - coding->source;
4973   coding->charbuf_used = charbuf - coding->charbuf;
4974 }
4975
4976 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4977    This function can encode charsets `ascii', `katakana-jisx0201',
4978    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4979    are sure that all these charsets are registered as official charset
4980    (i.e. do not have extended leading-codes).  Characters of other
4981    charsets are produced without any encoding.  If SJIS_P is 1, encode
4982    SJIS text, else encode BIG5 text.  */
4983
4984 static int
4985 encode_coding_sjis (struct coding_system *coding)
4986 {
4987   int multibytep = coding->dst_multibyte;
4988   int *charbuf = coding->charbuf;
4989   int *charbuf_end = charbuf + coding->charbuf_used;
4990   unsigned char *dst = coding->destination + coding->produced;
4991   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4992   int safe_room = 4;
4993   int produced_chars = 0;
4994   Lisp_Object attrs, charset_list, val;
4995   int ascii_compatible;
4996   struct charset *charset_roman, *charset_kanji, *charset_kana;
4997   struct charset *charset_kanji2;
4998   int c;
4999
5000   CODING_GET_INFO (coding, attrs, charset_list);
5001   val = charset_list;
5002   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5003   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5004   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5005   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
5006
5007   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5008
5009   while (charbuf < charbuf_end)
5010     {
5011       ASSURE_DESTINATION (safe_room);
5012       c = *charbuf++;
5013       /* Now encode the character C.  */
5014       if (ASCII_CHAR_P (c) && ascii_compatible)
5015         EMIT_ONE_ASCII_BYTE (c);
5016       else if (CHAR_BYTE8_P (c))
5017         {
5018           c = CHAR_TO_BYTE8 (c);
5019           EMIT_ONE_BYTE (c);
5020         }
5021       else
5022         {
5023           unsigned code;
5024           struct charset *charset = char_charset (c, charset_list, &code);
5025
5026           if (!charset)
5027             {
5028               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5029                 {
5030                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5031                   charset = CHARSET_FROM_ID (charset_ascii);
5032                 }
5033               else
5034                 {
5035                   c = coding->default_char;
5036                   charset = char_charset (c, charset_list, &code);
5037                 }
5038             }
5039           if (code == CHARSET_INVALID_CODE (charset))
5040             abort ();
5041           if (charset == charset_kanji)
5042             {
5043               int c1, c2;
5044               JIS_TO_SJIS (code);
5045               c1 = code >> 8, c2 = code & 0xFF;
5046               EMIT_TWO_BYTES (c1, c2);
5047             }
5048           else if (charset == charset_kana)
5049             EMIT_ONE_BYTE (code | 0x80);
5050           else if (charset_kanji2 && charset == charset_kanji2)
5051             {
5052               int c1, c2;
5053
5054               c1 = code >> 8;
5055               if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5056                   || c1 == 0x28
5057                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5058                 {
5059                   JIS_TO_SJIS2 (code);
5060                   c1 = code >> 8, c2 = code & 0xFF;
5061                   EMIT_TWO_BYTES (c1, c2);
5062                 }
5063               else
5064                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5065             }
5066           else
5067             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5068         }
5069     }
5070   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5071   coding->produced_char += produced_chars;
5072   coding->produced = dst - coding->destination;
5073   return 0;
5074 }
5075
5076 static int
5077 encode_coding_big5 (struct coding_system *coding)
5078 {
5079   int multibytep = coding->dst_multibyte;
5080   int *charbuf = coding->charbuf;
5081   int *charbuf_end = charbuf + coding->charbuf_used;
5082   unsigned char *dst = coding->destination + coding->produced;
5083   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5084   int safe_room = 4;
5085   int produced_chars = 0;
5086   Lisp_Object attrs, charset_list, val;
5087   int ascii_compatible;
5088   struct charset *charset_roman, *charset_big5;
5089   int c;
5090
5091   CODING_GET_INFO (coding, attrs, charset_list);
5092   val = charset_list;
5093   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5094   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5095   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5096
5097   while (charbuf < charbuf_end)
5098     {
5099       ASSURE_DESTINATION (safe_room);
5100       c = *charbuf++;
5101       /* Now encode the character C.  */
5102       if (ASCII_CHAR_P (c) && ascii_compatible)
5103         EMIT_ONE_ASCII_BYTE (c);
5104       else if (CHAR_BYTE8_P (c))
5105         {
5106           c = CHAR_TO_BYTE8 (c);
5107           EMIT_ONE_BYTE (c);
5108         }
5109       else
5110         {
5111           unsigned code;
5112           struct charset *charset = char_charset (c, charset_list, &code);
5113
5114           if (! charset)
5115             {
5116               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5117                 {
5118                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5119                   charset = CHARSET_FROM_ID (charset_ascii);
5120                 }
5121               else
5122                 {
5123                   c = coding->default_char;
5124                   charset = char_charset (c, charset_list, &code);
5125                 }
5126             }
5127           if (code == CHARSET_INVALID_CODE (charset))
5128             abort ();
5129           if (charset == charset_big5)
5130             {
5131               int c1, c2;
5132
5133               c1 = code >> 8, c2 = code & 0xFF;
5134               EMIT_TWO_BYTES (c1, c2);
5135             }
5136           else
5137             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5138         }
5139     }
5140   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5141   coding->produced_char += produced_chars;
5142   coding->produced = dst - coding->destination;
5143   return 0;
5144 }
5145
5146 \f
5147 /*** 10. CCL handlers ***/
5148
5149 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5150    Check if a text is encoded in a coding system of which
5151    encoder/decoder are written in CCL program.  If it is, return
5152    CATEGORY_MASK_CCL, else return 0.  */
5153
5154 static int
5155 detect_coding_ccl (struct coding_system *coding,
5156                    struct coding_detection_info *detect_info)
5157 {
5158   const unsigned char *src = coding->source, *src_base;
5159   const unsigned char *src_end = coding->source + coding->src_bytes;
5160   int multibytep = coding->src_multibyte;
5161   int consumed_chars = 0;
5162   int found = 0;
5163   unsigned char *valids;
5164   int head_ascii = coding->head_ascii;
5165   Lisp_Object attrs;
5166
5167   detect_info->checked |= CATEGORY_MASK_CCL;
5168
5169   coding = &coding_categories[coding_category_ccl];
5170   valids = CODING_CCL_VALIDS (coding);
5171   attrs = CODING_ID_ATTRS (coding->id);
5172   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5173     src += head_ascii;
5174
5175   while (1)
5176     {
5177       int c;
5178
5179       src_base = src;
5180       ONE_MORE_BYTE (c);
5181       if (c < 0 || ! valids[c])
5182         break;
5183       if ((valids[c] > 1))
5184         found = CATEGORY_MASK_CCL;
5185     }
5186   detect_info->rejected |= CATEGORY_MASK_CCL;
5187   return 0;
5188
5189  no_more_source:
5190   detect_info->found |= found;
5191   return 1;
5192 }
5193
5194 static void
5195 decode_coding_ccl (struct coding_system *coding)
5196 {
5197   const unsigned char *src = coding->source + coding->consumed;
5198   const unsigned char *src_end = coding->source + coding->src_bytes;
5199   int *charbuf = coding->charbuf + coding->charbuf_used;
5200   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5201   int consumed_chars = 0;
5202   int multibytep = coding->src_multibyte;
5203   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5204   int source_charbuf[1024];
5205   int source_byteidx[1025];
5206   Lisp_Object attrs, charset_list;
5207
5208   CODING_GET_INFO (coding, attrs, charset_list);
5209
5210   while (1)
5211     {
5212       const unsigned char *p = src;
5213       int i = 0;
5214
5215       if (multibytep)
5216         {
5217           while (i < 1024 && p < src_end)
5218             {
5219               source_byteidx[i] = p - src;
5220               source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5221             }
5222           source_byteidx[i] = p - src;
5223         }
5224       else
5225         while (i < 1024 && p < src_end)
5226           source_charbuf[i++] = *p++;
5227
5228       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5229         ccl->last_block = 1;
5230       ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5231                   charset_list);
5232       charbuf += ccl->produced;
5233       if (multibytep)
5234         src += source_byteidx[ccl->consumed];
5235       else
5236         src += ccl->consumed;
5237       consumed_chars += ccl->consumed;
5238       if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
5239         break;
5240     }
5241
5242   switch (ccl->status)
5243     {
5244     case CCL_STAT_SUSPEND_BY_SRC:
5245       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5246       break;
5247     case CCL_STAT_SUSPEND_BY_DST:
5248       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5249       break;
5250     case CCL_STAT_QUIT:
5251     case CCL_STAT_INVALID_CMD:
5252       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5253       break;
5254     default:
5255       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5256       break;
5257     }
5258   coding->consumed_char += consumed_chars;
5259   coding->consumed = src - coding->source;
5260   coding->charbuf_used = charbuf - coding->charbuf;
5261 }
5262
5263 static int
5264 encode_coding_ccl (struct coding_system *coding)
5265 {
5266   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5267   int multibytep = coding->dst_multibyte;
5268   int *charbuf = coding->charbuf;
5269   int *charbuf_end = charbuf + coding->charbuf_used;
5270   unsigned char *dst = coding->destination + coding->produced;
5271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5272   int destination_charbuf[1024];
5273   int i, produced_chars = 0;
5274   Lisp_Object attrs, charset_list;
5275
5276   CODING_GET_INFO (coding, attrs, charset_list);
5277   if (coding->consumed_char == coding->src_chars
5278       && coding->mode & CODING_MODE_LAST_BLOCK)
5279     ccl->last_block = 1;
5280
5281   while (charbuf < charbuf_end)
5282     {
5283       ccl_driver (ccl, charbuf, destination_charbuf,
5284                   charbuf_end - charbuf, 1024, charset_list);
5285       if (multibytep)
5286         {
5287           ASSURE_DESTINATION (ccl->produced * 2);
5288           for (i = 0; i < ccl->produced; i++)
5289             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5290         }
5291       else
5292         {
5293           ASSURE_DESTINATION (ccl->produced);
5294           for (i = 0; i < ccl->produced; i++)
5295             *dst++ = destination_charbuf[i] & 0xFF;
5296           produced_chars += ccl->produced;
5297         }
5298       charbuf += ccl->consumed;
5299       if (ccl->status == CCL_STAT_QUIT
5300           || ccl->status == CCL_STAT_INVALID_CMD)
5301         break;
5302     }
5303
5304   switch (ccl->status)
5305     {
5306     case CCL_STAT_SUSPEND_BY_SRC:
5307       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5308       break;
5309     case CCL_STAT_SUSPEND_BY_DST:
5310       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5311       break;
5312     case CCL_STAT_QUIT:
5313     case CCL_STAT_INVALID_CMD:
5314       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5315       break;
5316     default:
5317       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5318       break;
5319     }
5320
5321   coding->produced_char += produced_chars;
5322   coding->produced = dst - coding->destination;
5323   return 0;
5324 }
5325
5326
5327 \f
5328 /*** 10, 11. no-conversion handlers ***/
5329
5330 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5331
5332 static void
5333 decode_coding_raw_text (struct coding_system *coding)
5334 {
5335   int eol_crlf =
5336     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5337
5338   coding->chars_at_source = 1;
5339   coding->consumed_char = coding->src_chars;
5340   coding->consumed = coding->src_bytes;
5341   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5342     {
5343       coding->consumed_char--;
5344       coding->consumed--;
5345       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5346     }
5347   else
5348     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5349 }
5350
5351 static int
5352 encode_coding_raw_text (struct coding_system *coding)
5353 {
5354   int multibytep = coding->dst_multibyte;
5355   int *charbuf = coding->charbuf;
5356   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5357   unsigned char *dst = coding->destination + coding->produced;
5358   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5359   int produced_chars = 0;
5360   int c;
5361
5362   if (multibytep)
5363     {
5364       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5365
5366       if (coding->src_multibyte)
5367         while (charbuf < charbuf_end)
5368           {
5369             ASSURE_DESTINATION (safe_room);
5370             c = *charbuf++;
5371             if (ASCII_CHAR_P (c))
5372               EMIT_ONE_ASCII_BYTE (c);
5373             else if (CHAR_BYTE8_P (c))
5374               {
5375                 c = CHAR_TO_BYTE8 (c);
5376                 EMIT_ONE_BYTE (c);
5377               }
5378             else
5379               {
5380                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5381
5382                 CHAR_STRING_ADVANCE (c, p1);
5383                 while (p0 < p1)
5384                   {
5385                     EMIT_ONE_BYTE (*p0);
5386                     p0++;
5387                   }
5388               }
5389           }
5390       else
5391         while (charbuf < charbuf_end)
5392           {
5393             ASSURE_DESTINATION (safe_room);
5394             c = *charbuf++;
5395             EMIT_ONE_BYTE (c);
5396           }
5397     }
5398   else
5399     {
5400       if (coding->src_multibyte)
5401         {
5402           int safe_room = MAX_MULTIBYTE_LENGTH;
5403
5404           while (charbuf < charbuf_end)
5405             {
5406               ASSURE_DESTINATION (safe_room);
5407               c = *charbuf++;
5408               if (ASCII_CHAR_P (c))
5409                 *dst++ = c;
5410               else if (CHAR_BYTE8_P (c))
5411                 *dst++ = CHAR_TO_BYTE8 (c);
5412               else
5413                 CHAR_STRING_ADVANCE (c, dst);
5414             }
5415         }
5416       else
5417         {
5418           ASSURE_DESTINATION (charbuf_end - charbuf);
5419           while (charbuf < charbuf_end && dst < dst_end)
5420             *dst++ = *charbuf++;
5421         }
5422       produced_chars = dst - (coding->destination + coding->produced);
5423     }
5424   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5425   coding->produced_char += produced_chars;
5426   coding->produced = dst - coding->destination;
5427   return 0;
5428 }
5429
5430 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5431    Check if a text is encoded in a charset-based coding system.  If it
5432    is, return 1, else return 0.  */
5433
5434 static int
5435 detect_coding_charset (struct coding_system *coding,
5436                        struct coding_detection_info *detect_info)
5437 {
5438   const unsigned char *src = coding->source, *src_base;
5439   const unsigned char *src_end = coding->source + coding->src_bytes;
5440   int multibytep = coding->src_multibyte;
5441   int consumed_chars = 0;
5442   Lisp_Object attrs, valids, name;
5443   int found = 0;
5444   int head_ascii = coding->head_ascii;
5445   int check_latin_extra = 0;
5446
5447   detect_info->checked |= CATEGORY_MASK_CHARSET;
5448
5449   coding = &coding_categories[coding_category_charset];
5450   attrs = CODING_ID_ATTRS (coding->id);
5451   valids = AREF (attrs, coding_attr_charset_valids);
5452   name = CODING_ID_NAME (coding->id);
5453   if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5454                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5455       || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5456                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5457     check_latin_extra = 1;
5458
5459   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5460     src += head_ascii;
5461
5462   while (1)
5463     {
5464       int c;
5465       Lisp_Object val;
5466       struct charset *charset;
5467       int dim, idx;
5468
5469       src_base = src;
5470       ONE_MORE_BYTE (c);
5471       if (c < 0)
5472         continue;
5473       val = AREF (valids, c);
5474       if (NILP (val))
5475         break;
5476       if (c >= 0x80)
5477         {
5478           if (c < 0xA0
5479               && check_latin_extra
5480               && (!VECTORP (Vlatin_extra_code_table)
5481                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5482             break;
5483           found = CATEGORY_MASK_CHARSET;
5484         }
5485       if (INTEGERP (val))
5486         {
5487           charset = CHARSET_FROM_ID (XFASTINT (val));
5488           dim = CHARSET_DIMENSION (charset);
5489           for (idx = 1; idx < dim; idx++)
5490             {
5491               if (src == src_end)
5492                 goto too_short;
5493               ONE_MORE_BYTE (c);
5494               if (c < charset->code_space[(dim - 1 - idx) * 2]
5495                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5496                 break;
5497             }
5498           if (idx < dim)
5499             break;
5500         }
5501       else
5502         {
5503           idx = 1;
5504           for (; CONSP (val); val = XCDR (val))
5505             {
5506               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5507               dim = CHARSET_DIMENSION (charset);
5508               while (idx < dim)
5509                 {
5510                   if (src == src_end)
5511                     goto too_short;
5512                   ONE_MORE_BYTE (c);
5513                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5514                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5515                     break;
5516                   idx++;
5517                 }
5518               if (idx == dim)
5519                 {
5520                   val = Qnil;
5521                   break;
5522                 }
5523             }
5524           if (CONSP (val))
5525             break;
5526         }
5527     }
5528  too_short:
5529   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5530   return 0;
5531
5532  no_more_source:
5533   detect_info->found |= found;
5534   return 1;
5535 }
5536
5537 static void
5538 decode_coding_charset (struct coding_system *coding)
5539 {
5540   const unsigned char *src = coding->source + coding->consumed;
5541   const unsigned char *src_end = coding->source + coding->src_bytes;
5542   const unsigned char *src_base;
5543   int *charbuf = coding->charbuf + coding->charbuf_used;
5544   /* We may produce one charset annocation in one loop and one more at
5545      the end.  */
5546   int *charbuf_end
5547     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5548   int consumed_chars = 0, consumed_chars_base;
5549   int multibytep = coding->src_multibyte;
5550   Lisp_Object attrs, charset_list, valids;
5551   int char_offset = coding->produced_char;
5552   int last_offset = char_offset;
5553   int last_id = charset_ascii;
5554   int eol_crlf =
5555     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5556   int byte_after_cr = -1;
5557
5558   CODING_GET_INFO (coding, attrs, charset_list);
5559   valids = AREF (attrs, coding_attr_charset_valids);
5560
5561   while (1)
5562     {
5563       int c;
5564       Lisp_Object val;
5565       struct charset *charset;
5566       int dim;
5567       int len = 1;
5568       unsigned code;
5569
5570       src_base = src;
5571       consumed_chars_base = consumed_chars;
5572
5573       if (charbuf >= charbuf_end)
5574         {
5575           if (byte_after_cr >= 0)
5576             src_base--;
5577           break;
5578         }
5579
5580       if (byte_after_cr >= 0)
5581         {
5582           c = byte_after_cr;
5583           byte_after_cr = -1;
5584         }
5585       else
5586         {
5587           ONE_MORE_BYTE (c);
5588           if (eol_crlf && c == '\r')
5589             ONE_MORE_BYTE (byte_after_cr);
5590         }
5591       if (c < 0)
5592         goto invalid_code;
5593       code = c;
5594
5595       val = AREF (valids, c);
5596       if (! INTEGERP (val) && ! CONSP (val))
5597         goto invalid_code;
5598       if (INTEGERP (val))
5599         {
5600           charset = CHARSET_FROM_ID (XFASTINT (val));
5601           dim = CHARSET_DIMENSION (charset);
5602           while (len < dim)
5603             {
5604               ONE_MORE_BYTE (c);
5605               code = (code << 8) | c;
5606               len++;
5607             }
5608           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5609                               charset, code, c);
5610         }
5611       else
5612         {
5613           /* VAL is a list of charset IDs.  It is assured that the
5614              list is sorted by charset dimensions (smaller one
5615              comes first).  */
5616           while (CONSP (val))
5617             {
5618               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5619               dim = CHARSET_DIMENSION (charset);
5620               while (len < dim)
5621                 {
5622                   ONE_MORE_BYTE (c);
5623                   code = (code << 8) | c;
5624                   len++;
5625                 }
5626               CODING_DECODE_CHAR (coding, src, src_base,
5627                                   src_end, charset, code, c);
5628               if (c >= 0)
5629                 break;
5630               val = XCDR (val);
5631             }
5632         }
5633       if (c < 0)
5634         goto invalid_code;
5635       if (charset->id != charset_ascii
5636           && last_id != charset->id)
5637         {
5638           if (last_id != charset_ascii)
5639             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5640           last_id = charset->id;
5641           last_offset = char_offset;
5642         }
5643
5644       *charbuf++ = c;
5645       char_offset++;
5646       continue;
5647
5648     invalid_code:
5649       src = src_base;
5650       consumed_chars = consumed_chars_base;
5651       ONE_MORE_BYTE (c);
5652       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5653       char_offset++;
5654       coding->errors++;
5655     }
5656
5657  no_more_source:
5658   if (last_id != charset_ascii)
5659     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5660   coding->consumed_char += consumed_chars_base;
5661   coding->consumed = src_base - coding->source;
5662   coding->charbuf_used = charbuf - coding->charbuf;
5663 }
5664
5665 static int
5666 encode_coding_charset (struct coding_system *coding)
5667 {
5668   int multibytep = coding->dst_multibyte;
5669   int *charbuf = coding->charbuf;
5670   int *charbuf_end = charbuf + coding->charbuf_used;
5671   unsigned char *dst = coding->destination + coding->produced;
5672   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5673   int safe_room = MAX_MULTIBYTE_LENGTH;
5674   int produced_chars = 0;
5675   Lisp_Object attrs, charset_list;
5676   int ascii_compatible;
5677   int c;
5678
5679   CODING_GET_INFO (coding, attrs, charset_list);
5680   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5681
5682   while (charbuf < charbuf_end)
5683     {
5684       struct charset *charset;
5685       unsigned code;
5686
5687       ASSURE_DESTINATION (safe_room);
5688       c = *charbuf++;
5689       if (ascii_compatible && ASCII_CHAR_P (c))
5690         EMIT_ONE_ASCII_BYTE (c);
5691       else if (CHAR_BYTE8_P (c))
5692         {
5693           c = CHAR_TO_BYTE8 (c);
5694           EMIT_ONE_BYTE (c);
5695         }
5696       else
5697         {
5698           charset = char_charset (c, charset_list, &code);
5699           if (charset)
5700             {
5701               if (CHARSET_DIMENSION (charset) == 1)
5702                 EMIT_ONE_BYTE (code);
5703               else if (CHARSET_DIMENSION (charset) == 2)
5704                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5705               else if (CHARSET_DIMENSION (charset) == 3)
5706                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5707               else
5708                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5709                                  (code >> 8) & 0xFF, code & 0xFF);
5710             }
5711           else
5712             {
5713               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5714                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5715               else
5716                 c = coding->default_char;
5717               EMIT_ONE_BYTE (c);
5718             }
5719         }
5720     }
5721
5722   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5723   coding->produced_char += produced_chars;
5724   coding->produced = dst - coding->destination;
5725   return 0;
5726 }
5727
5728 \f
5729 /*** 7. C library functions ***/
5730
5731 /* Setup coding context CODING from information about CODING_SYSTEM.
5732    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5733    CODING_SYSTEM is invalid, signal an error.  */
5734
5735 void
5736 setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5737 {
5738   Lisp_Object attrs;
5739   Lisp_Object eol_type;
5740   Lisp_Object coding_type;
5741   Lisp_Object val;
5742
5743   if (NILP (coding_system))
5744     coding_system = Qundecided;
5745
5746   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5747
5748   attrs = CODING_ID_ATTRS (coding->id);
5749   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5750
5751   coding->mode = 0;
5752   coding->head_ascii = -1;
5753   if (VECTORP (eol_type))
5754     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5755                             | CODING_REQUIRE_DETECTION_MASK);
5756   else if (! EQ (eol_type, Qunix))
5757     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5758                             | CODING_REQUIRE_ENCODING_MASK);
5759   else
5760     coding->common_flags = 0;
5761   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5762     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5763   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5764     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5765   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5766     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5767
5768   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5769   coding->max_charset_id = SCHARS (val) - 1;
5770   coding->safe_charsets = SDATA (val);
5771   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5772   coding->carryover_bytes = 0;
5773
5774   coding_type = CODING_ATTR_TYPE (attrs);
5775   if (EQ (coding_type, Qundecided))
5776     {
5777       coding->detector = NULL;
5778       coding->decoder = decode_coding_raw_text;
5779       coding->encoder = encode_coding_raw_text;
5780       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5781     }
5782   else if (EQ (coding_type, Qiso_2022))
5783     {
5784       int i;
5785       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5786
5787       /* Invoke graphic register 0 to plane 0.  */
5788       CODING_ISO_INVOCATION (coding, 0) = 0;
5789       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5790       CODING_ISO_INVOCATION (coding, 1)
5791         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5792       /* Setup the initial status of designation.  */
5793       for (i = 0; i < 4; i++)
5794         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5795       /* Not single shifting initially.  */
5796       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5797       /* Beginning of buffer should also be regarded as bol. */
5798       CODING_ISO_BOL (coding) = 1;
5799       coding->detector = detect_coding_iso_2022;
5800       coding->decoder = decode_coding_iso_2022;
5801       coding->encoder = encode_coding_iso_2022;
5802       if (flags & CODING_ISO_FLAG_SAFE)
5803         coding->mode |= CODING_MODE_SAFE_ENCODING;
5804       coding->common_flags
5805         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5806             | CODING_REQUIRE_FLUSHING_MASK);
5807       if (flags & CODING_ISO_FLAG_COMPOSITION)
5808         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5809       if (flags & CODING_ISO_FLAG_DESIGNATION)
5810         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5811       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5812         {
5813           setup_iso_safe_charsets (attrs);
5814           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5815           coding->max_charset_id = SCHARS (val) - 1;
5816           coding->safe_charsets = SDATA (val);
5817         }
5818       CODING_ISO_FLAGS (coding) = flags;
5819       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5820       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5821       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5822       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5823     }
5824   else if (EQ (coding_type, Qcharset))
5825     {
5826       coding->detector = detect_coding_charset;
5827       coding->decoder = decode_coding_charset;
5828       coding->encoder = encode_coding_charset;
5829       coding->common_flags
5830         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5831     }
5832   else if (EQ (coding_type, Qutf_8))
5833     {
5834       val = AREF (attrs, coding_attr_utf_bom);
5835       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5836                                    : EQ (val, Qt) ? utf_with_bom
5837                                    : utf_without_bom);
5838       coding->detector = detect_coding_utf_8;
5839       coding->decoder = decode_coding_utf_8;
5840       coding->encoder = encode_coding_utf_8;
5841       coding->common_flags
5842         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5843       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5844         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5845     }
5846   else if (EQ (coding_type, Qutf_16))
5847     {
5848       val = AREF (attrs, coding_attr_utf_bom);
5849       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5850                                     : EQ (val, Qt) ? utf_with_bom
5851                                     : utf_without_bom);
5852       val = AREF (attrs, coding_attr_utf_16_endian);
5853       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5854                                        : utf_16_little_endian);
5855       CODING_UTF_16_SURROGATE (coding) = 0;
5856       coding->detector = detect_coding_utf_16;
5857       coding->decoder = decode_coding_utf_16;
5858       coding->encoder = encode_coding_utf_16;
5859       coding->common_flags
5860         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5861       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5862         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5863     }
5864   else if (EQ (coding_type, Qccl))
5865     {
5866       coding->detector = detect_coding_ccl;
5867       coding->decoder = decode_coding_ccl;
5868       coding->encoder = encode_coding_ccl;
5869       coding->common_flags
5870         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5871             | CODING_REQUIRE_FLUSHING_MASK);
5872     }
5873   else if (EQ (coding_type, Qemacs_mule))
5874     {
5875       coding->detector = detect_coding_emacs_mule;
5876       coding->decoder = decode_coding_emacs_mule;
5877       coding->encoder = encode_coding_emacs_mule;
5878       coding->common_flags
5879         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5880       coding->spec.emacs_mule.full_support = 1;
5881       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5882           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5883         {
5884           Lisp_Object tail, safe_charsets;
5885           int max_charset_id = 0;
5886
5887           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5888                tail = XCDR (tail))
5889             if (max_charset_id < XFASTINT (XCAR (tail)))
5890               max_charset_id = XFASTINT (XCAR (tail));
5891           safe_charsets = make_uninit_string (max_charset_id + 1);
5892           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5893           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5894                tail = XCDR (tail))
5895             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5896           coding->max_charset_id = max_charset_id;
5897           coding->safe_charsets = SDATA (safe_charsets);
5898           coding->spec.emacs_mule.full_support = 1;
5899         }
5900       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5901       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5902     }
5903   else if (EQ (coding_type, Qshift_jis))
5904     {
5905       coding->detector = detect_coding_sjis;
5906       coding->decoder = decode_coding_sjis;
5907       coding->encoder = encode_coding_sjis;
5908       coding->common_flags
5909         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5910     }
5911   else if (EQ (coding_type, Qbig5))
5912     {
5913       coding->detector = detect_coding_big5;
5914       coding->decoder = decode_coding_big5;
5915       coding->encoder = encode_coding_big5;
5916       coding->common_flags
5917         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5918     }
5919   else                          /* EQ (coding_type, Qraw_text) */
5920     {
5921       coding->detector = NULL;
5922       coding->decoder = decode_coding_raw_text;
5923       coding->encoder = encode_coding_raw_text;
5924       if (! EQ (eol_type, Qunix))
5925         {
5926           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5927           if (! VECTORP (eol_type))
5928             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5929         }
5930
5931     }
5932
5933   return;
5934 }
5935
5936 /* Return a list of charsets supported by CODING.  */
5937
5938 Lisp_Object
5939 coding_charset_list (struct coding_system *coding)
5940 {
5941   Lisp_Object attrs, charset_list;
5942
5943   CODING_GET_INFO (coding, attrs, charset_list);
5944   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5945     {
5946       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5947
5948       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5949         charset_list = Viso_2022_charset_list;
5950     }
5951   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5952     {
5953       charset_list = Vemacs_mule_charset_list;
5954     }
5955   return charset_list;
5956 }
5957
5958
5959 /* Return a list of charsets supported by CODING-SYSTEM.  */
5960
5961 Lisp_Object
5962 coding_system_charset_list (Lisp_Object coding_system)
5963 {
5964   int id;
5965   Lisp_Object attrs, charset_list;
5966
5967   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5968   attrs = CODING_ID_ATTRS (id);
5969
5970   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5971     {
5972       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5973
5974       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5975         charset_list = Viso_2022_charset_list;
5976       else
5977         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5978     }
5979   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5980     {
5981       charset_list = Vemacs_mule_charset_list;
5982     }
5983   else
5984     {
5985       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5986     }
5987   return charset_list;
5988 }
5989
5990
5991 /* Return raw-text or one of its subsidiaries that has the same
5992    eol_type as CODING-SYSTEM.  */
5993
5994 Lisp_Object
5995 raw_text_coding_system (Lisp_Object coding_system)
5996 {
5997   Lisp_Object spec, attrs;
5998   Lisp_Object eol_type, raw_text_eol_type;
5999
6000   if (NILP (coding_system))
6001     return Qraw_text;
6002   spec = CODING_SYSTEM_SPEC (coding_system);
6003   attrs = AREF (spec, 0);
6004
6005   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
6006     return coding_system;
6007
6008   eol_type = AREF (spec, 2);
6009   if (VECTORP (eol_type))
6010     return Qraw_text;
6011   spec = CODING_SYSTEM_SPEC (Qraw_text);
6012   raw_text_eol_type = AREF (spec, 2);
6013   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6014           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6015           : AREF (raw_text_eol_type, 2));
6016 }
6017
6018
6019 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
6020    does, return one of the subsidiary that has the same eol-spec as
6021    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
6022    inherit end-of-line format from the system's setting
6023    (system_eol_type).  */
6024
6025 Lisp_Object
6026 coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
6027 {
6028   Lisp_Object spec, eol_type;
6029
6030   if (NILP (coding_system))
6031     coding_system = Qraw_text;
6032   spec = CODING_SYSTEM_SPEC (coding_system);
6033   eol_type = AREF (spec, 2);
6034   if (VECTORP (eol_type))
6035     {
6036       Lisp_Object parent_eol_type;
6037
6038       if (! NILP (parent))
6039         {
6040           Lisp_Object parent_spec;
6041
6042           parent_spec = CODING_SYSTEM_SPEC (parent);
6043           parent_eol_type = AREF (parent_spec, 2);
6044         }
6045       else
6046         parent_eol_type = system_eol_type;
6047       if (EQ (parent_eol_type, Qunix))
6048         coding_system = AREF (eol_type, 0);
6049       else if (EQ (parent_eol_type, Qdos))
6050         coding_system = AREF (eol_type, 1);
6051       else if (EQ (parent_eol_type, Qmac))
6052         coding_system = AREF (eol_type, 2);
6053     }
6054   return coding_system;
6055 }
6056
6057 /* Emacs has a mechanism to automatically detect a coding system if it
6058    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6059    it's impossible to distinguish some coding systems accurately
6060    because they use the same range of codes.  So, at first, coding
6061    systems are categorized into 7, those are:
6062
6063    o coding-category-emacs-mule
6064
6065         The category for a coding system which has the same code range
6066         as Emacs' internal format.  Assigned the coding-system (Lisp
6067         symbol) `emacs-mule' by default.
6068
6069    o coding-category-sjis
6070
6071         The category for a coding system which has the same code range
6072         as SJIS.  Assigned the coding-system (Lisp
6073         symbol) `japanese-shift-jis' by default.
6074
6075    o coding-category-iso-7
6076
6077         The category for a coding system which has the same code range
6078         as ISO2022 of 7-bit environment.  This doesn't use any locking
6079         shift and single shift functions.  This can encode/decode all
6080         charsets.  Assigned the coding-system (Lisp symbol)
6081         `iso-2022-7bit' by default.
6082
6083    o coding-category-iso-7-tight
6084
6085         Same as coding-category-iso-7 except that this can
6086         encode/decode only the specified charsets.
6087
6088    o coding-category-iso-8-1
6089
6090         The category for a coding system which has the same code range
6091         as ISO2022 of 8-bit environment and graphic plane 1 used only
6092         for DIMENSION1 charset.  This doesn't use any locking shift
6093         and single shift functions.  Assigned the coding-system (Lisp
6094         symbol) `iso-latin-1' by default.
6095
6096    o coding-category-iso-8-2
6097
6098         The category for a coding system which has the same code range
6099         as ISO2022 of 8-bit environment and graphic plane 1 used only
6100         for DIMENSION2 charset.  This doesn't use any locking shift
6101         and single shift functions.  Assigned the coding-system (Lisp
6102         symbol) `japanese-iso-8bit' by default.
6103
6104    o coding-category-iso-7-else
6105
6106         The category for a coding system which has the same code range
6107         as ISO2022 of 7-bit environemnt but uses locking shift or
6108         single shift functions.  Assigned the coding-system (Lisp
6109         symbol) `iso-2022-7bit-lock' by default.
6110
6111    o coding-category-iso-8-else
6112
6113         The category for a coding system which has the same code range
6114         as ISO2022 of 8-bit environemnt but uses locking shift or
6115         single shift functions.  Assigned the coding-system (Lisp
6116         symbol) `iso-2022-8bit-ss2' by default.
6117
6118    o coding-category-big5
6119
6120         The category for a coding system which has the same code range
6121         as BIG5.  Assigned the coding-system (Lisp symbol)
6122         `cn-big5' by default.
6123
6124    o coding-category-utf-8
6125
6126         The category for a coding system which has the same code range
6127         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6128         symbol) `utf-8' by default.
6129
6130    o coding-category-utf-16-be
6131
6132         The category for a coding system in which a text has an
6133         Unicode signature (cf. Unicode Standard) in the order of BIG
6134         endian at the head.  Assigned the coding-system (Lisp symbol)
6135         `utf-16-be' by default.
6136
6137    o coding-category-utf-16-le
6138
6139         The category for a coding system in which a text has an
6140         Unicode signature (cf. Unicode Standard) in the order of
6141         LITTLE endian at the head.  Assigned the coding-system (Lisp
6142         symbol) `utf-16-le' by default.
6143
6144    o coding-category-ccl
6145
6146         The category for a coding system of which encoder/decoder is
6147         written in CCL programs.  The default value is nil, i.e., no
6148         coding system is assigned.
6149
6150    o coding-category-binary
6151
6152         The category for a coding system not categorized in any of the
6153         above.  Assigned the coding-system (Lisp symbol)
6154         `no-conversion' by default.
6155
6156    Each of them is a Lisp symbol and the value is an actual
6157    `coding-system's (this is also a Lisp symbol) assigned by a user.
6158    What Emacs does actually is to detect a category of coding system.
6159    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6160    decide only one possible category, it selects a category of the
6161    highest priority.  Priorities of categories are also specified by a
6162    user in a Lisp variable `coding-category-list'.
6163
6164 */
6165
6166 #define EOL_SEEN_NONE   0
6167 #define EOL_SEEN_LF     1
6168 #define EOL_SEEN_CR     2
6169 #define EOL_SEEN_CRLF   4
6170
6171 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6172    SOURCE is encoded.  If CATEGORY is one of
6173    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6174    two-byte, else they are encoded by one-byte.
6175
6176    Return one of EOL_SEEN_XXX.  */
6177
6178 #define MAX_EOL_CHECK_COUNT 3
6179
6180 static int
6181 detect_eol (const unsigned char *source, EMACS_INT src_bytes,
6182             enum coding_category category)
6183 {
6184   const unsigned char *src = source, *src_end = src + src_bytes;
6185   unsigned char c;
6186   int total  = 0;
6187   int eol_seen = EOL_SEEN_NONE;
6188
6189   if ((1 << category) & CATEGORY_MASK_UTF_16)
6190     {
6191       int msb, lsb;
6192
6193       msb = category == (coding_category_utf_16_le
6194                          | coding_category_utf_16_le_nosig);
6195       lsb = 1 - msb;
6196
6197       while (src + 1 < src_end)
6198         {
6199           c = src[lsb];
6200           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6201             {
6202               int this_eol;
6203
6204               if (c == '\n')
6205                 this_eol = EOL_SEEN_LF;
6206               else if (src + 3 >= src_end
6207                        || src[msb + 2] != 0
6208                        || src[lsb + 2] != '\n')
6209                 this_eol = EOL_SEEN_CR;
6210               else
6211                 {
6212                   this_eol = EOL_SEEN_CRLF;
6213                   src += 2;
6214                 }
6215
6216               if (eol_seen == EOL_SEEN_NONE)
6217                 /* This is the first end-of-line.  */
6218                 eol_seen = this_eol;
6219               else if (eol_seen != this_eol)
6220                 {
6221                   /* The found type is different from what found before.
6222                      Allow for stray ^M characters in DOS EOL files.  */
6223                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6224                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6225                     eol_seen = EOL_SEEN_CRLF;
6226                   else
6227                     {
6228                       eol_seen = EOL_SEEN_LF;
6229                       break;
6230                     }
6231                 }
6232               if (++total == MAX_EOL_CHECK_COUNT)
6233                 break;
6234             }
6235           src += 2;
6236         }
6237     }
6238   else
6239     {
6240       while (src < src_end)
6241         {
6242           c = *src++;
6243           if (c == '\n' || c == '\r')
6244             {
6245               int this_eol;
6246
6247               if (c == '\n')
6248                 this_eol = EOL_SEEN_LF;
6249               else if (src >= src_end || *src != '\n')
6250                 this_eol = EOL_SEEN_CR;
6251               else
6252                 this_eol = EOL_SEEN_CRLF, src++;
6253
6254               if (eol_seen == EOL_SEEN_NONE)
6255                 /* This is the first end-of-line.  */
6256                 eol_seen = this_eol;
6257               else if (eol_seen != this_eol)
6258                 {
6259                   /* The found type is different from what found before.
6260                      Allow for stray ^M characters in DOS EOL files.  */
6261                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6262                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6263                     eol_seen = EOL_SEEN_CRLF;
6264                   else
6265                     {
6266                       eol_seen = EOL_SEEN_LF;
6267                       break;
6268                     }
6269                 }
6270               if (++total == MAX_EOL_CHECK_COUNT)
6271                 break;
6272             }
6273         }
6274     }
6275   return eol_seen;
6276 }
6277
6278
6279 static Lisp_Object
6280 adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6281 {
6282   Lisp_Object eol_type;
6283
6284   eol_type = CODING_ID_EOL_TYPE (coding->id);
6285   if (eol_seen & EOL_SEEN_LF)
6286     {
6287       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6288       eol_type = Qunix;
6289     }
6290   else if (eol_seen & EOL_SEEN_CRLF)
6291     {
6292       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6293       eol_type = Qdos;
6294     }
6295   else if (eol_seen & EOL_SEEN_CR)
6296     {
6297       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6298       eol_type = Qmac;
6299     }
6300   return eol_type;
6301 }
6302
6303 /* Detect how a text specified in CODING is encoded.  If a coding
6304    system is detected, update fields of CODING by the detected coding
6305    system.  */
6306
6307 void
6308 detect_coding (struct coding_system *coding)
6309 {
6310   const unsigned char *src, *src_end;
6311   int saved_mode = coding->mode;
6312
6313   coding->consumed = coding->consumed_char = 0;
6314   coding->produced = coding->produced_char = 0;
6315   coding_set_source (coding);
6316
6317   src_end = coding->source + coding->src_bytes;
6318   coding->head_ascii = 0;
6319
6320   /* If we have not yet decided the text encoding type, detect it
6321      now.  */
6322   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6323     {
6324       int c, i;
6325       struct coding_detection_info detect_info;
6326       int null_byte_found = 0, eight_bit_found = 0;
6327
6328       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6329       for (src = coding->source; src < src_end; src++)
6330         {
6331           c = *src;
6332           if (c & 0x80)
6333             {
6334               eight_bit_found = 1;
6335               if (null_byte_found)
6336                 break;
6337             }
6338           else if (c < 0x20)
6339             {
6340               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6341                   && ! inhibit_iso_escape_detection
6342                   && ! detect_info.checked)
6343                 {
6344                   if (detect_coding_iso_2022 (coding, &detect_info))
6345                     {
6346                       /* We have scanned the whole data.  */
6347                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6348                         {
6349                           /* We didn't find an 8-bit code.  We may
6350                              have found a null-byte, but it's very
6351                              rare that a binary file conforms to
6352                              ISO-2022.  */
6353                           src = src_end;
6354                           coding->head_ascii = src - coding->source;
6355                         }
6356                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6357                       break;
6358                     }
6359                 }
6360               else if (! c && !inhibit_null_byte_detection)
6361                 {
6362                   null_byte_found = 1;
6363                   if (eight_bit_found)
6364                     break;
6365                 }
6366               if (! eight_bit_found)
6367                 coding->head_ascii++;
6368             }
6369           else if (! eight_bit_found)
6370             coding->head_ascii++;
6371         }
6372
6373       if (null_byte_found || eight_bit_found
6374           || coding->head_ascii < coding->src_bytes
6375           || detect_info.found)
6376         {
6377           enum coding_category category;
6378           struct coding_system *this;
6379
6380           if (coding->head_ascii == coding->src_bytes)
6381             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6382             for (i = 0; i < coding_category_raw_text; i++)
6383               {
6384                 category = coding_priorities[i];
6385                 this = coding_categories + category;
6386                 if (detect_info.found & (1 << category))
6387                   break;
6388               }
6389           else
6390             {
6391               if (null_byte_found)
6392                 {
6393                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6394                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6395                 }
6396               for (i = 0; i < coding_category_raw_text; i++)
6397                 {
6398                   category = coding_priorities[i];
6399                   this = coding_categories + category;
6400                   if (this->id < 0)
6401                     {
6402                       /* No coding system of this category is defined.  */
6403                       detect_info.rejected |= (1 << category);
6404                     }
6405                   else if (category >= coding_category_raw_text)
6406                     continue;
6407                   else if (detect_info.checked & (1 << category))
6408                     {
6409                       if (detect_info.found & (1 << category))
6410                         break;
6411                     }
6412                   else if ((*(this->detector)) (coding, &detect_info)
6413                            && detect_info.found & (1 << category))
6414                     {
6415                       if (category == coding_category_utf_16_auto)
6416                         {
6417                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6418                             category = coding_category_utf_16_le;
6419                           else
6420                             category = coding_category_utf_16_be;
6421                         }
6422                       break;
6423                     }
6424                 }
6425             }
6426
6427           if (i < coding_category_raw_text)
6428             setup_coding_system (CODING_ID_NAME (this->id), coding);
6429           else if (null_byte_found)
6430             setup_coding_system (Qno_conversion, coding);
6431           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6432                    == CATEGORY_MASK_ANY)
6433             setup_coding_system (Qraw_text, coding);
6434           else if (detect_info.rejected)
6435             for (i = 0; i < coding_category_raw_text; i++)
6436               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6437                 {
6438                   this = coding_categories + coding_priorities[i];
6439                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6440                   break;
6441                 }
6442         }
6443     }
6444   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6445            == coding_category_utf_8_auto)
6446     {
6447       Lisp_Object coding_systems;
6448       struct coding_detection_info detect_info;
6449
6450       coding_systems
6451         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6452       detect_info.found = detect_info.rejected = 0;
6453       coding->head_ascii = 0;
6454       if (CONSP (coding_systems)
6455           && detect_coding_utf_8 (coding, &detect_info))
6456         {
6457           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6458             setup_coding_system (XCAR (coding_systems), coding);
6459           else
6460             setup_coding_system (XCDR (coding_systems), coding);
6461         }
6462     }
6463   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6464            == coding_category_utf_16_auto)
6465     {
6466       Lisp_Object coding_systems;
6467       struct coding_detection_info detect_info;
6468
6469       coding_systems
6470         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6471       detect_info.found = detect_info.rejected = 0;
6472       coding->head_ascii = 0;
6473       if (CONSP (coding_systems)
6474           && detect_coding_utf_16 (coding, &detect_info))
6475         {
6476           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6477             setup_coding_system (XCAR (coding_systems), coding);
6478           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6479             setup_coding_system (XCDR (coding_systems), coding);
6480         }
6481     }
6482   coding->mode = saved_mode;
6483 }
6484
6485
6486 static void
6487 decode_eol (struct coding_system *coding)
6488 {
6489   Lisp_Object eol_type;
6490   unsigned char *p, *pbeg, *pend;
6491
6492   eol_type = CODING_ID_EOL_TYPE (coding->id);
6493   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6494     return;
6495
6496   if (NILP (coding->dst_object))
6497     pbeg = coding->destination;
6498   else
6499     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6500   pend = pbeg + coding->produced;
6501
6502   if (VECTORP (eol_type))
6503     {
6504       int eol_seen = EOL_SEEN_NONE;
6505
6506       for (p = pbeg; p < pend; p++)
6507         {
6508           if (*p == '\n')
6509             eol_seen |= EOL_SEEN_LF;
6510           else if (*p == '\r')
6511             {
6512               if (p + 1 < pend && *(p + 1) == '\n')
6513                 {
6514                   eol_seen |= EOL_SEEN_CRLF;
6515                   p++;
6516                 }
6517               else
6518                 eol_seen |= EOL_SEEN_CR;
6519             }
6520         }
6521       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6522       if ((eol_seen & EOL_SEEN_CRLF) != 0
6523           && (eol_seen & EOL_SEEN_CR) != 0
6524           && (eol_seen & EOL_SEEN_LF) == 0)
6525         eol_seen = EOL_SEEN_CRLF;
6526       else if (eol_seen != EOL_SEEN_NONE
6527           && eol_seen != EOL_SEEN_LF
6528           && eol_seen != EOL_SEEN_CRLF
6529           && eol_seen != EOL_SEEN_CR)
6530         eol_seen = EOL_SEEN_LF;
6531       if (eol_seen != EOL_SEEN_NONE)
6532         eol_type = adjust_coding_eol_type (coding, eol_seen);
6533     }
6534
6535   if (EQ (eol_type, Qmac))
6536     {
6537       for (p = pbeg; p < pend; p++)
6538         if (*p == '\r')
6539           *p = '\n';
6540     }
6541   else if (EQ (eol_type, Qdos))
6542     {
6543       int n = 0;
6544
6545       if (NILP (coding->dst_object))
6546         {
6547           /* Start deleting '\r' from the tail to minimize the memory
6548              movement.  */
6549           for (p = pend - 2; p >= pbeg; p--)
6550             if (*p == '\r')
6551               {
6552                 memmove (p, p + 1, pend-- - p - 1);
6553                 n++;
6554               }
6555         }
6556       else
6557         {
6558           int pos_byte = coding->dst_pos_byte;
6559           int pos = coding->dst_pos;
6560           int pos_end = pos + coding->produced_char - 1;
6561
6562           while (pos < pos_end)
6563             {
6564               p = BYTE_POS_ADDR (pos_byte);
6565               if (*p == '\r' && p[1] == '\n')
6566                 {
6567                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6568                   n++;
6569                   pos_end--;
6570                 }
6571               pos++;
6572               if (coding->dst_multibyte)
6573                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6574               else
6575                 pos_byte++;
6576             }
6577         }
6578       coding->produced -= n;
6579       coding->produced_char -= n;
6580     }
6581 }
6582
6583
6584 /* Return a translation table (or list of them) from coding system
6585    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6586    decoding (ENCODEP is zero). */
6587
6588 static Lisp_Object
6589 get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
6590 {
6591   Lisp_Object standard, translation_table;
6592   Lisp_Object val;
6593
6594   if (NILP (Venable_character_translation))
6595     {
6596       if (max_lookup)
6597         *max_lookup = 0;
6598       return Qnil;
6599     }
6600   if (encodep)
6601     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6602       standard = Vstandard_translation_table_for_encode;
6603   else
6604     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6605       standard = Vstandard_translation_table_for_decode;
6606   if (NILP (translation_table))
6607     translation_table = standard;
6608   else
6609     {
6610       if (SYMBOLP (translation_table))
6611         translation_table = Fget (translation_table, Qtranslation_table);
6612       else if (CONSP (translation_table))
6613         {
6614           translation_table = Fcopy_sequence (translation_table);
6615           for (val = translation_table; CONSP (val); val = XCDR (val))
6616             if (SYMBOLP (XCAR (val)))
6617               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6618         }
6619       if (CHAR_TABLE_P (standard))
6620         {
6621           if (CONSP (translation_table))
6622             translation_table = nconc2 (translation_table,
6623                                         Fcons (standard, Qnil));
6624           else
6625             translation_table = Fcons (translation_table,
6626                                        Fcons (standard, Qnil));
6627         }
6628     }
6629
6630   if (max_lookup)
6631     {
6632       *max_lookup = 1;
6633       if (CHAR_TABLE_P (translation_table)
6634           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6635         {
6636           val = XCHAR_TABLE (translation_table)->extras[1];
6637           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6638             *max_lookup = XFASTINT (val);
6639         }
6640       else if (CONSP (translation_table))
6641         {
6642           Lisp_Object tail, val;
6643
6644           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6645             if (CHAR_TABLE_P (XCAR (tail))
6646                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6647               {
6648                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6649                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6650                   *max_lookup = XFASTINT (val);
6651               }
6652         }
6653     }
6654   return translation_table;
6655 }
6656
6657 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6658   do {                                                          \
6659     trans = Qnil;                                               \
6660     if (CHAR_TABLE_P (table))                                   \
6661       {                                                         \
6662         trans = CHAR_TABLE_REF (table, c);                      \
6663         if (CHARACTERP (trans))                                 \
6664           c = XFASTINT (trans), trans = Qnil;                   \
6665       }                                                         \
6666     else if (CONSP (table))                                     \
6667       {                                                         \
6668         Lisp_Object tail;                                       \
6669                                                                 \
6670         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6671           if (CHAR_TABLE_P (XCAR (tail)))                       \
6672             {                                                   \
6673               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6674               if (CHARACTERP (trans))                           \
6675                 c = XFASTINT (trans), trans = Qnil;             \
6676               else if (! NILP (trans))                          \
6677                 break;                                          \
6678             }                                                   \
6679       }                                                         \
6680   } while (0)
6681
6682
6683 /* Return a translation of character(s) at BUF according to TRANS.
6684    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6685    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6686    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6687    translation is found, and Qnil if not found..
6688    If BUF is too short to lookup characters in FROM, return Qt.  */
6689
6690 static Lisp_Object
6691 get_translation (Lisp_Object trans, int *buf, int *buf_end)
6692 {
6693
6694   if (INTEGERP (trans))
6695     return trans;
6696   for (; CONSP (trans); trans = XCDR (trans))
6697     {
6698       Lisp_Object val = XCAR (trans);
6699       Lisp_Object from = XCAR (val);
6700       int len = ASIZE (from);
6701       int i;
6702
6703       for (i = 0; i < len; i++)
6704         {
6705           if (buf + i == buf_end)
6706             return Qt;
6707           if (XINT (AREF (from, i)) != buf[i])
6708             break;
6709         }
6710       if (i == len)
6711         return val;
6712     }
6713   return Qnil;
6714 }
6715
6716
6717 static int
6718 produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6719                int last_block)
6720 {
6721   unsigned char *dst = coding->destination + coding->produced;
6722   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6723   EMACS_INT produced;
6724   EMACS_INT produced_chars = 0;
6725   int carryover = 0;
6726
6727   if (! coding->chars_at_source)
6728     {
6729       /* Source characters are in coding->charbuf.  */
6730       int *buf = coding->charbuf;
6731       int *buf_end = buf + coding->charbuf_used;
6732
6733       if (EQ (coding->src_object, coding->dst_object))
6734         {
6735           coding_set_source (coding);
6736           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6737         }
6738
6739       while (buf < buf_end)
6740         {
6741           int c = *buf, i;
6742
6743           if (c >= 0)
6744             {
6745               int from_nchars = 1, to_nchars = 1;
6746               Lisp_Object trans = Qnil;
6747
6748               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6749               if (! NILP (trans))
6750                 {
6751                   trans = get_translation (trans, buf, buf_end);
6752                   if (INTEGERP (trans))
6753                     c = XINT (trans);
6754                   else if (CONSP (trans))
6755                     {
6756                       from_nchars = ASIZE (XCAR (trans));
6757                       trans = XCDR (trans);
6758                       if (INTEGERP (trans))
6759                         c = XINT (trans);
6760                       else
6761                         {
6762                           to_nchars = ASIZE (trans);
6763                           c = XINT (AREF (trans, 0));
6764                         }
6765                     }
6766                   else if (EQ (trans, Qt) && ! last_block)
6767                     break;
6768                 }
6769
6770               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6771                 {
6772                   dst = alloc_destination (coding,
6773                                            buf_end - buf
6774                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6775                                            dst);
6776                   if (EQ (coding->src_object, coding->dst_object))
6777                     {
6778                       coding_set_source (coding);
6779                       dst_end = (((unsigned char *) coding->source)
6780                                  + coding->consumed);
6781                     }
6782                   else
6783                     dst_end = coding->destination + coding->dst_bytes;
6784                 }
6785
6786               for (i = 0; i < to_nchars; i++)
6787                 {
6788                   if (i > 0)
6789                     c = XINT (AREF (trans, i));
6790                   if (coding->dst_multibyte
6791                       || ! CHAR_BYTE8_P (c))
6792                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6793                   else
6794                     *dst++ = CHAR_TO_BYTE8 (c);
6795                 }
6796               produced_chars += to_nchars;
6797               buf += from_nchars;
6798             }
6799           else
6800             /* This is an annotation datum.  (-C) is the length.  */
6801             buf += -c;
6802         }
6803       carryover = buf_end - buf;
6804     }
6805   else
6806     {
6807       /* Source characters are at coding->source.  */
6808       const unsigned char *src = coding->source;
6809       const unsigned char *src_end = src + coding->consumed;
6810
6811       if (EQ (coding->dst_object, coding->src_object))
6812         dst_end = (unsigned char *) src;
6813       if (coding->src_multibyte != coding->dst_multibyte)
6814         {
6815           if (coding->src_multibyte)
6816             {
6817               int multibytep = 1;
6818               EMACS_INT consumed_chars = 0;
6819
6820               while (1)
6821                 {
6822                   const unsigned char *src_base = src;
6823                   int c;
6824
6825                   ONE_MORE_BYTE (c);
6826                   if (dst == dst_end)
6827                     {
6828                       if (EQ (coding->src_object, coding->dst_object))
6829                         dst_end = (unsigned char *) src;
6830                       if (dst == dst_end)
6831                         {
6832                           EMACS_INT offset = src - coding->source;
6833
6834                           dst = alloc_destination (coding, src_end - src + 1,
6835                                                    dst);
6836                           dst_end = coding->destination + coding->dst_bytes;
6837                           coding_set_source (coding);
6838                           src = coding->source + offset;
6839                           src_end = coding->source + coding->src_bytes;
6840                           if (EQ (coding->src_object, coding->dst_object))
6841                             dst_end = (unsigned char *) src;
6842                         }
6843                     }
6844                   *dst++ = c;
6845                   produced_chars++;
6846                 }
6847             no_more_source:
6848               ;
6849             }
6850           else
6851             while (src < src_end)
6852               {
6853                 int multibytep = 1;
6854                 int c = *src++;
6855
6856                 if (dst >= dst_end - 1)
6857                   {
6858                     if (EQ (coding->src_object, coding->dst_object))
6859                       dst_end = (unsigned char *) src;
6860                     if (dst >= dst_end - 1)
6861                       {
6862                         EMACS_INT offset = src - coding->source;
6863                         EMACS_INT more_bytes;
6864
6865                         if (EQ (coding->src_object, coding->dst_object))
6866                           more_bytes = ((src_end - src) / 2) + 2;
6867                         else
6868                           more_bytes = src_end - src + 2;
6869                         dst = alloc_destination (coding, more_bytes, dst);
6870                         dst_end = coding->destination + coding->dst_bytes;
6871                         coding_set_source (coding);
6872                         src = coding->source + offset;
6873                         src_end = coding->source + coding->src_bytes;
6874                         if (EQ (coding->src_object, coding->dst_object))
6875                           dst_end = (unsigned char *) src;
6876                       }
6877                   }
6878                 EMIT_ONE_BYTE (c);
6879               }
6880         }
6881       else
6882         {
6883           if (!EQ (coding->src_object, coding->dst_object))
6884             {
6885               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6886
6887               if (require > 0)
6888                 {
6889                   EMACS_INT offset = src - coding->source;
6890
6891                   dst = alloc_destination (coding, require, dst);
6892                   coding_set_source (coding);
6893                   src = coding->source + offset;
6894                   src_end = coding->source + coding->src_bytes;
6895                 }
6896             }
6897           produced_chars = coding->consumed_char;
6898           while (src < src_end)
6899             *dst++ = *src++;
6900         }
6901     }
6902
6903   produced = dst - (coding->destination + coding->produced);
6904   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6905     insert_from_gap (produced_chars, produced);
6906   coding->produced += produced;
6907   coding->produced_char += produced_chars;
6908   return carryover;
6909 }
6910
6911 /* Compose text in CODING->object according to the annotation data at
6912    CHARBUF.  CHARBUF is an array:
6913      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6914  */
6915
6916 static INLINE void
6917 produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6918 {
6919   int len;
6920   EMACS_INT to;
6921   enum composition_method method;
6922   Lisp_Object components;
6923
6924   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6925   to = pos + charbuf[2];
6926   method = (enum composition_method) (charbuf[4]);
6927
6928   if (method == COMPOSITION_RELATIVE)
6929     components = Qnil;
6930   else
6931     {
6932       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6933       int i, j;
6934
6935       if (method == COMPOSITION_WITH_RULE)
6936         len = charbuf[2] * 3 - 2;
6937       charbuf += MAX_ANNOTATION_LENGTH;
6938       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6939       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6940         {
6941           if (charbuf[i] >= 0)
6942             args[j] = make_number (charbuf[i]);
6943           else
6944             {
6945               i++;
6946               args[j] = make_number (charbuf[i] % 0x100);
6947             }
6948         }
6949       components = (i == j ? Fstring (j, args) : Fvector (j, args));
6950     }
6951   compose_text (pos, to, components, Qnil, coding->dst_object);
6952 }
6953
6954
6955 /* Put `charset' property on text in CODING->object according to
6956    the annotation data at CHARBUF.  CHARBUF is an array:
6957      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6958  */
6959
6960 static INLINE void
6961 produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6962 {
6963   EMACS_INT from = pos - charbuf[2];
6964   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6965
6966   Fput_text_property (make_number (from), make_number (pos),
6967                       Qcharset, CHARSET_NAME (charset),
6968                       coding->dst_object);
6969 }
6970
6971
6972 #define CHARBUF_SIZE 0x4000
6973
6974 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6975   do {                                                                  \
6976     int size = CHARBUF_SIZE;                                            \
6977                                                                         \
6978     coding->charbuf = NULL;                                             \
6979     while (size > 1024)                                                 \
6980       {                                                                 \
6981         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6982         if (coding->charbuf)                                            \
6983           break;                                                        \
6984         size >>= 1;                                                     \
6985       }                                                                 \
6986     if (! coding->charbuf)                                              \
6987       {                                                                 \
6988         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6989         return coding->result;                                          \
6990       }                                                                 \
6991     coding->charbuf_size = size;                                        \
6992   } while (0)
6993
6994
6995 static void
6996 produce_annotation (struct coding_system *coding, EMACS_INT pos)
6997 {
6998   int *charbuf = coding->charbuf;
6999   int *charbuf_end = charbuf + coding->charbuf_used;
7000
7001   if (NILP (coding->dst_object))
7002     return;
7003
7004   while (charbuf < charbuf_end)
7005     {
7006       if (*charbuf >= 0)
7007         pos++, charbuf++;
7008       else
7009         {
7010           int len = -*charbuf;
7011
7012           if (len > 2)
7013             switch (charbuf[1])
7014               {
7015               case CODING_ANNOTATE_COMPOSITION_MASK:
7016                 produce_composition (coding, charbuf, pos);
7017                 break;
7018               case CODING_ANNOTATE_CHARSET_MASK:
7019                 produce_charset (coding, charbuf, pos);
7020                 break;
7021               }
7022           charbuf += len;
7023         }
7024     }
7025 }
7026
7027 /* Decode the data at CODING->src_object into CODING->dst_object.
7028    CODING->src_object is a buffer, a string, or nil.
7029    CODING->dst_object is a buffer.
7030
7031    If CODING->src_object is a buffer, it must be the current buffer.
7032    In this case, if CODING->src_pos is positive, it is a position of
7033    the source text in the buffer, otherwise, the source text is in the
7034    gap area of the buffer, and CODING->src_pos specifies the offset of
7035    the text from GPT (which must be the same as PT).  If this is the
7036    same buffer as CODING->dst_object, CODING->src_pos must be
7037    negative.
7038
7039    If CODING->src_object is a string, CODING->src_pos is an index to
7040    that string.
7041
7042    If CODING->src_object is nil, CODING->source must already point to
7043    the non-relocatable memory area.  In this case, CODING->src_pos is
7044    an offset from CODING->source.
7045
7046    The decoded data is inserted at the current point of the buffer
7047    CODING->dst_object.
7048 */
7049
7050 static int
7051 decode_coding (struct coding_system *coding)
7052 {
7053   Lisp_Object attrs;
7054   Lisp_Object undo_list;
7055   Lisp_Object translation_table;
7056   struct ccl_spec cclspec;
7057   int carryover;
7058   int i;
7059
7060   if (BUFFERP (coding->src_object)
7061       && coding->src_pos > 0
7062       && coding->src_pos < GPT
7063       && coding->src_pos + coding->src_chars > GPT)
7064     move_gap_both (coding->src_pos, coding->src_pos_byte);
7065
7066   undo_list = Qt;
7067   if (BUFFERP (coding->dst_object))
7068     {
7069       if (current_buffer != XBUFFER (coding->dst_object))
7070         set_buffer_internal (XBUFFER (coding->dst_object));
7071       if (GPT != PT)
7072         move_gap_both (PT, PT_BYTE);
7073       undo_list = current_buffer->undo_list;
7074       current_buffer->undo_list = Qt;
7075     }
7076
7077   coding->consumed = coding->consumed_char = 0;
7078   coding->produced = coding->produced_char = 0;
7079   coding->chars_at_source = 0;
7080   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7081   coding->errors = 0;
7082
7083   ALLOC_CONVERSION_WORK_AREA (coding);
7084
7085   attrs = CODING_ID_ATTRS (coding->id);
7086   translation_table = get_translation_table (attrs, 0, NULL);
7087
7088   carryover = 0;
7089   if (coding->decoder == decode_coding_ccl)
7090     {
7091       coding->spec.ccl = &cclspec;
7092       setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7093     }
7094   do
7095     {
7096       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7097
7098       coding_set_source (coding);
7099       coding->annotated = 0;
7100       coding->charbuf_used = carryover;
7101       (*(coding->decoder)) (coding);
7102       coding_set_destination (coding);
7103       carryover = produce_chars (coding, translation_table, 0);
7104       if (coding->annotated)
7105         produce_annotation (coding, pos);
7106       for (i = 0; i < carryover; i++)
7107         coding->charbuf[i]
7108           = coding->charbuf[coding->charbuf_used - carryover + i];
7109     }
7110   while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7111          || (coding->consumed < coding->src_bytes
7112              && (coding->result == CODING_RESULT_SUCCESS
7113                  || coding->result == CODING_RESULT_INVALID_SRC)));
7114
7115   if (carryover > 0)
7116     {
7117       coding_set_destination (coding);
7118       coding->charbuf_used = carryover;
7119       produce_chars (coding, translation_table, 1);
7120     }
7121
7122   coding->carryover_bytes = 0;
7123   if (coding->consumed < coding->src_bytes)
7124     {
7125       int nbytes = coding->src_bytes - coding->consumed;
7126       const unsigned char *src;
7127
7128       coding_set_source (coding);
7129       coding_set_destination (coding);
7130       src = coding->source + coding->consumed;
7131
7132       if (coding->mode & CODING_MODE_LAST_BLOCK)
7133         {
7134           /* Flush out unprocessed data as binary chars.  We are sure
7135              that the number of data is less than the size of
7136              coding->charbuf.  */
7137           coding->charbuf_used = 0;
7138           coding->chars_at_source = 0;
7139
7140           while (nbytes-- > 0)
7141             {
7142               int c = *src++;
7143
7144               if (c & 0x80)
7145                 c = BYTE8_TO_CHAR (c);
7146               coding->charbuf[coding->charbuf_used++] = c;
7147             }
7148           produce_chars (coding, Qnil, 1);
7149         }
7150       else
7151         {
7152           /* Record unprocessed bytes in coding->carryover.  We are
7153              sure that the number of data is less than the size of
7154              coding->carryover.  */
7155           unsigned char *p = coding->carryover;
7156
7157           if (nbytes > sizeof coding->carryover)
7158             nbytes = sizeof coding->carryover;
7159           coding->carryover_bytes = nbytes;
7160           while (nbytes-- > 0)
7161             *p++ = *src++;
7162         }
7163       coding->consumed = coding->src_bytes;
7164     }
7165
7166   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7167       && !inhibit_eol_conversion)
7168     decode_eol (coding);
7169   if (BUFFERP (coding->dst_object))
7170     {
7171       current_buffer->undo_list = undo_list;
7172       record_insert (coding->dst_pos, coding->produced_char);
7173     }
7174   return coding->result;
7175 }
7176
7177
7178 /* Extract an annotation datum from a composition starting at POS and
7179    ending before LIMIT of CODING->src_object (buffer or string), store
7180    the data in BUF, set *STOP to a starting position of the next
7181    composition (if any) or to LIMIT, and return the address of the
7182    next element of BUF.
7183
7184    If such an annotation is not found, set *STOP to a starting
7185    position of a composition after POS (if any) or to LIMIT, and
7186    return BUF.  */
7187
7188 static INLINE int *
7189 handle_composition_annotation (EMACS_INT pos, EMACS_INT limit,
7190                                struct coding_system *coding, int *buf,
7191                                EMACS_INT *stop)
7192 {
7193   EMACS_INT start, end;
7194   Lisp_Object prop;
7195
7196   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7197       || end > limit)
7198     *stop = limit;
7199   else if (start > pos)
7200     *stop = start;
7201   else
7202     {
7203       if (start == pos)
7204         {
7205           /* We found a composition.  Store the corresponding
7206              annotation data in BUF.  */
7207           int *head = buf;
7208           enum composition_method method = COMPOSITION_METHOD (prop);
7209           int nchars = COMPOSITION_LENGTH (prop);
7210
7211           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7212           if (method != COMPOSITION_RELATIVE)
7213             {
7214               Lisp_Object components;
7215               int len, i, i_byte;
7216
7217               components = COMPOSITION_COMPONENTS (prop);
7218               if (VECTORP (components))
7219                 {
7220                   len = XVECTOR (components)->size;
7221                   for (i = 0; i < len; i++)
7222                     *buf++ = XINT (AREF (components, i));
7223                 }
7224               else if (STRINGP (components))
7225                 {
7226                   len = SCHARS (components);
7227                   i = i_byte = 0;
7228                   while (i < len)
7229                     {
7230                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7231                       buf++;
7232                     }
7233                 }
7234               else if (INTEGERP (components))
7235                 {
7236                   len = 1;
7237                   *buf++ = XINT (components);
7238                 }
7239               else if (CONSP (components))
7240                 {
7241                   for (len = 0; CONSP (components);
7242                        len++, components = XCDR (components))
7243                     *buf++ = XINT (XCAR (components));
7244                 }
7245               else
7246                 abort ();
7247               *head -= len;
7248             }
7249         }
7250
7251       if (find_composition (end, limit, &start, &end, &prop,
7252                             coding->src_object)
7253           && end <= limit)
7254         *stop = start;
7255       else
7256         *stop = limit;
7257     }
7258   return buf;
7259 }
7260
7261
7262 /* Extract an annotation datum from a text property `charset' at POS of
7263    CODING->src_object (buffer of string), store the data in BUF, set
7264    *STOP to the position where the value of `charset' property changes
7265    (limiting by LIMIT), and return the address of the next element of
7266    BUF.
7267
7268    If the property value is nil, set *STOP to the position where the
7269    property value is non-nil (limiting by LIMIT), and return BUF.  */
7270
7271 static INLINE int *
7272 handle_charset_annotation (EMACS_INT pos, EMACS_INT limit,
7273                            struct coding_system *coding, int *buf,
7274                            EMACS_INT *stop)
7275 {
7276   Lisp_Object val, next;
7277   int id;
7278
7279   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7280   if (! NILP (val) && CHARSETP (val))
7281     id = XINT (CHARSET_SYMBOL_ID (val));
7282   else
7283     id = -1;
7284   ADD_CHARSET_DATA (buf, 0, id);
7285   next = Fnext_single_property_change (make_number (pos), Qcharset,
7286                                        coding->src_object,
7287                                        make_number (limit));
7288   *stop = XINT (next);
7289   return buf;
7290 }
7291
7292
7293 static void
7294 consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7295                int max_lookup)
7296 {
7297   int *buf = coding->charbuf;
7298   int *buf_end = coding->charbuf + coding->charbuf_size;
7299   const unsigned char *src = coding->source + coding->consumed;
7300   const unsigned char *src_end = coding->source + coding->src_bytes;
7301   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7302   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7303   int multibytep = coding->src_multibyte;
7304   Lisp_Object eol_type;
7305   int c;
7306   EMACS_INT stop, stop_composition, stop_charset;
7307   int *lookup_buf = NULL;
7308
7309   if (! NILP (translation_table))
7310     lookup_buf = alloca (sizeof (int) * max_lookup);
7311
7312   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7313   if (VECTORP (eol_type))
7314     eol_type = Qunix;
7315
7316   /* Note: composition handling is not yet implemented.  */
7317   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7318
7319   if (NILP (coding->src_object))
7320     stop = stop_composition = stop_charset = end_pos;
7321   else
7322     {
7323       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7324         stop = stop_composition = pos;
7325       else
7326         stop = stop_composition = end_pos;
7327       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7328         stop = stop_charset = pos;
7329       else
7330         stop_charset = end_pos;
7331     }
7332
7333   /* Compensate for CRLF and conversion.  */
7334   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7335   while (buf < buf_end)
7336     {
7337       Lisp_Object trans;
7338
7339       if (pos == stop)
7340         {
7341           if (pos == end_pos)
7342             break;
7343           if (pos == stop_composition)
7344             buf = handle_composition_annotation (pos, end_pos, coding,
7345                                                  buf, &stop_composition);
7346           if (pos == stop_charset)
7347             buf = handle_charset_annotation (pos, end_pos, coding,
7348                                              buf, &stop_charset);
7349           stop = (stop_composition < stop_charset
7350                   ? stop_composition : stop_charset);
7351         }
7352
7353       if (! multibytep)
7354         {
7355           EMACS_INT bytes;
7356
7357           if (coding->encoder == encode_coding_raw_text
7358               || coding->encoder == encode_coding_ccl)
7359             c = *src++, pos++;
7360           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7361             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7362           else
7363             c = BYTE8_TO_CHAR (*src), src++, pos++;
7364         }
7365       else
7366         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7367       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7368         c = '\n';
7369       if (! EQ (eol_type, Qunix))
7370         {
7371           if (c == '\n')
7372             {
7373               if (EQ (eol_type, Qdos))
7374                 *buf++ = '\r';
7375               else
7376                 c = '\r';
7377             }
7378         }
7379
7380       trans = Qnil;
7381       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7382       if (NILP (trans))
7383         *buf++ = c;
7384       else
7385         {
7386           int from_nchars = 1, to_nchars = 1;
7387           int *lookup_buf_end;
7388           const unsigned char *p = src;
7389           int i;
7390
7391           lookup_buf[0] = c;
7392           for (i = 1; i < max_lookup && p < src_end; i++)
7393             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7394           lookup_buf_end = lookup_buf + i;
7395           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7396           if (INTEGERP (trans))
7397             c = XINT (trans);
7398           else if (CONSP (trans))
7399             {
7400               from_nchars = ASIZE (XCAR (trans));
7401               trans = XCDR (trans);
7402               if (INTEGERP (trans))
7403                 c = XINT (trans);
7404               else
7405                 {
7406                   to_nchars = ASIZE (trans);
7407                   if (buf + to_nchars > buf_end)
7408                     break;
7409                   c = XINT (AREF (trans, 0));
7410                 }
7411             }
7412           else
7413             break;
7414           *buf++ = c;
7415           for (i = 1; i < to_nchars; i++)
7416             *buf++ = XINT (AREF (trans, i));
7417           for (i = 1; i < from_nchars; i++, pos++)
7418             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7419         }
7420     }
7421
7422   coding->consumed = src - coding->source;
7423   coding->consumed_char = pos - coding->src_pos;
7424   coding->charbuf_used = buf - coding->charbuf;
7425   coding->chars_at_source = 0;
7426 }
7427
7428
7429 /* Encode the text at CODING->src_object into CODING->dst_object.
7430    CODING->src_object is a buffer or a string.
7431    CODING->dst_object is a buffer or nil.
7432
7433    If CODING->src_object is a buffer, it must be the current buffer.
7434    In this case, if CODING->src_pos is positive, it is a position of
7435    the source text in the buffer, otherwise. the source text is in the
7436    gap area of the buffer, and coding->src_pos specifies the offset of
7437    the text from GPT (which must be the same as PT).  If this is the
7438    same buffer as CODING->dst_object, CODING->src_pos must be
7439    negative and CODING should not have `pre-write-conversion'.
7440
7441    If CODING->src_object is a string, CODING should not have
7442    `pre-write-conversion'.
7443
7444    If CODING->dst_object is a buffer, the encoded data is inserted at
7445    the current point of that buffer.
7446
7447    If CODING->dst_object is nil, the encoded data is placed at the
7448    memory area specified by CODING->destination.  */
7449
7450 static int
7451 encode_coding (struct coding_system *coding)
7452 {
7453   Lisp_Object attrs;
7454   Lisp_Object translation_table;
7455   int max_lookup;
7456   struct ccl_spec cclspec;
7457
7458   attrs = CODING_ID_ATTRS (coding->id);
7459   if (coding->encoder == encode_coding_raw_text)
7460     translation_table = Qnil, max_lookup = 0;
7461   else
7462     translation_table = get_translation_table (attrs, 1, &max_lookup);
7463
7464   if (BUFFERP (coding->dst_object))
7465     {
7466       set_buffer_internal (XBUFFER (coding->dst_object));
7467       coding->dst_multibyte
7468         = ! NILP (current_buffer->enable_multibyte_characters);
7469     }
7470
7471   coding->consumed = coding->consumed_char = 0;
7472   coding->produced = coding->produced_char = 0;
7473   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7474   coding->errors = 0;
7475
7476   ALLOC_CONVERSION_WORK_AREA (coding);
7477
7478   if (coding->encoder == encode_coding_ccl)
7479     {
7480       coding->spec.ccl = &cclspec;
7481       setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7482     }
7483   do {
7484     coding_set_source (coding);
7485     consume_chars (coding, translation_table, max_lookup);
7486     coding_set_destination (coding);
7487     (*(coding->encoder)) (coding);
7488   } while (coding->consumed_char < coding->src_chars);
7489
7490   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7491     insert_from_gap (coding->produced_char, coding->produced);
7492
7493   return (coding->result);
7494 }
7495
7496
7497 /* Name (or base name) of work buffer for code conversion.  */
7498 static Lisp_Object Vcode_conversion_workbuf_name;
7499
7500 /* A working buffer used by the top level conversion.  Once it is
7501    created, it is never destroyed.  It has the name
7502    Vcode_conversion_workbuf_name.  The other working buffers are
7503    destroyed after the use is finished, and their names are modified
7504    versions of Vcode_conversion_workbuf_name.  */
7505 static Lisp_Object Vcode_conversion_reused_workbuf;
7506
7507 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7508 static int reused_workbuf_in_use;
7509
7510
7511 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7512    multibyteness of returning buffer.  */
7513
7514 static Lisp_Object
7515 make_conversion_work_buffer (int multibyte)
7516 {
7517   Lisp_Object name, workbuf;
7518   struct buffer *current;
7519
7520   if (reused_workbuf_in_use++)
7521     {
7522       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7523       workbuf = Fget_buffer_create (name);
7524     }
7525   else
7526     {
7527       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7528         Vcode_conversion_reused_workbuf
7529           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7530       workbuf = Vcode_conversion_reused_workbuf;
7531     }
7532   current = current_buffer;
7533   set_buffer_internal (XBUFFER (workbuf));
7534   /* We can't allow modification hooks to run in the work buffer.  For
7535      instance, directory_files_internal assumes that file decoding
7536      doesn't compile new regexps.  */
7537   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7538   Ferase_buffer ();
7539   current_buffer->undo_list = Qt;
7540   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7541   set_buffer_internal (current);
7542   return workbuf;
7543 }
7544
7545
7546 static Lisp_Object
7547 code_conversion_restore (Lisp_Object arg)
7548 {
7549   Lisp_Object current, workbuf;
7550   struct gcpro gcpro1;
7551
7552   GCPRO1 (arg);
7553   current = XCAR (arg);
7554   workbuf = XCDR (arg);
7555   if (! NILP (workbuf))
7556     {
7557       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7558         reused_workbuf_in_use = 0;
7559       else if (! NILP (Fbuffer_live_p (workbuf)))
7560         Fkill_buffer (workbuf);
7561     }
7562   set_buffer_internal (XBUFFER (current));
7563   UNGCPRO;
7564   return Qnil;
7565 }
7566
7567 Lisp_Object
7568 code_conversion_save (int with_work_buf, int multibyte)
7569 {
7570   Lisp_Object workbuf = Qnil;
7571
7572   if (with_work_buf)
7573     workbuf = make_conversion_work_buffer (multibyte);
7574   record_unwind_protect (code_conversion_restore,
7575                          Fcons (Fcurrent_buffer (), workbuf));
7576   return workbuf;
7577 }
7578
7579 int
7580 decode_coding_gap (struct coding_system *coding,
7581                    EMACS_INT chars, EMACS_INT bytes)
7582 {
7583   int count = SPECPDL_INDEX ();
7584   Lisp_Object attrs;
7585
7586   code_conversion_save (0, 0);
7587
7588   coding->src_object = Fcurrent_buffer ();
7589   coding->src_chars = chars;
7590   coding->src_bytes = bytes;
7591   coding->src_pos = -chars;
7592   coding->src_pos_byte = -bytes;
7593   coding->src_multibyte = chars < bytes;
7594   coding->dst_object = coding->src_object;
7595   coding->dst_pos = PT;
7596   coding->dst_pos_byte = PT_BYTE;
7597   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7598
7599   if (CODING_REQUIRE_DETECTION (coding))
7600     detect_coding (coding);
7601
7602   coding->mode |= CODING_MODE_LAST_BLOCK;
7603   current_buffer->text->inhibit_shrinking = 1;
7604   decode_coding (coding);
7605   current_buffer->text->inhibit_shrinking = 0;
7606
7607   attrs = CODING_ID_ATTRS (coding->id);
7608   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7609     {
7610       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7611       Lisp_Object val;
7612
7613       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7614       val = call1 (CODING_ATTR_POST_READ (attrs),
7615                    make_number (coding->produced_char));
7616       CHECK_NATNUM (val);
7617       coding->produced_char += Z - prev_Z;
7618       coding->produced += Z_BYTE - prev_Z_BYTE;
7619     }
7620
7621   unbind_to (count, Qnil);
7622   return coding->result;
7623 }
7624
7625 int
7626 encode_coding_gap (struct coding_system *coding,
7627                    EMACS_INT chars, EMACS_INT bytes)
7628 {
7629   int count = SPECPDL_INDEX ();
7630
7631   code_conversion_save (0, 0);
7632
7633   coding->src_object = Fcurrent_buffer ();
7634   coding->src_chars = chars;
7635   coding->src_bytes = bytes;
7636   coding->src_pos = -chars;
7637   coding->src_pos_byte = -bytes;
7638   coding->src_multibyte = chars < bytes;
7639   coding->dst_object = coding->src_object;
7640   coding->dst_pos = PT;
7641   coding->dst_pos_byte = PT_BYTE;
7642
7643   encode_coding (coding);
7644
7645   unbind_to (count, Qnil);
7646   return coding->result;
7647 }
7648
7649
7650 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7651    SRC_OBJECT into DST_OBJECT by coding context CODING.
7652
7653    SRC_OBJECT is a buffer, a string, or Qnil.
7654
7655    If it is a buffer, the text is at point of the buffer.  FROM and TO
7656    are positions in the buffer.
7657
7658    If it is a string, the text is at the beginning of the string.
7659    FROM and TO are indices to the string.
7660
7661    If it is nil, the text is at coding->source.  FROM and TO are
7662    indices to coding->source.
7663
7664    DST_OBJECT is a buffer, Qt, or Qnil.
7665
7666    If it is a buffer, the decoded text is inserted at point of the
7667    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7668    is deleted.
7669
7670    If it is Qt, a string is made from the decoded text, and
7671    set in CODING->dst_object.
7672
7673    If it is Qnil, the decoded text is stored at CODING->destination.
7674    The caller must allocate CODING->dst_bytes bytes at
7675    CODING->destination by xmalloc.  If the decoded text is longer than
7676    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7677  */
7678
7679 void
7680 decode_coding_object (struct coding_system *coding,
7681                       Lisp_Object src_object,
7682                       EMACS_INT from, EMACS_INT from_byte,
7683                       EMACS_INT to, EMACS_INT to_byte,
7684                       Lisp_Object dst_object)
7685 {
7686   int count = SPECPDL_INDEX ();
7687   unsigned char *destination;
7688   EMACS_INT dst_bytes;
7689   EMACS_INT chars = to - from;
7690   EMACS_INT bytes = to_byte - from_byte;
7691   Lisp_Object attrs;
7692   int saved_pt = -1, saved_pt_byte;
7693   int need_marker_adjustment = 0;
7694   Lisp_Object old_deactivate_mark;
7695
7696   old_deactivate_mark = Vdeactivate_mark;
7697
7698   if (NILP (dst_object))
7699     {
7700       destination = coding->destination;
7701       dst_bytes = coding->dst_bytes;
7702     }
7703
7704   coding->src_object = src_object;
7705   coding->src_chars = chars;
7706   coding->src_bytes = bytes;
7707   coding->src_multibyte = chars < bytes;
7708
7709   if (STRINGP (src_object))
7710     {
7711       coding->src_pos = from;
7712       coding->src_pos_byte = from_byte;
7713     }
7714   else if (BUFFERP (src_object))
7715     {
7716       set_buffer_internal (XBUFFER (src_object));
7717       if (from != GPT)
7718         move_gap_both (from, from_byte);
7719       if (EQ (src_object, dst_object))
7720         {
7721           struct Lisp_Marker *tail;
7722
7723           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7724             {
7725               tail->need_adjustment
7726                 = tail->charpos == (tail->insertion_type ? from : to);
7727               need_marker_adjustment |= tail->need_adjustment;
7728             }
7729           saved_pt = PT, saved_pt_byte = PT_BYTE;
7730           TEMP_SET_PT_BOTH (from, from_byte);
7731           current_buffer->text->inhibit_shrinking = 1;
7732           del_range_both (from, from_byte, to, to_byte, 1);
7733           coding->src_pos = -chars;
7734           coding->src_pos_byte = -bytes;
7735         }
7736       else
7737         {
7738           coding->src_pos = from;
7739           coding->src_pos_byte = from_byte;
7740         }
7741     }
7742
7743   if (CODING_REQUIRE_DETECTION (coding))
7744     detect_coding (coding);
7745   attrs = CODING_ID_ATTRS (coding->id);
7746
7747   if (EQ (dst_object, Qt)
7748       || (! NILP (CODING_ATTR_POST_READ (attrs))
7749           && NILP (dst_object)))
7750     {
7751       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7752       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7753       coding->dst_pos = BEG;
7754       coding->dst_pos_byte = BEG_BYTE;
7755     }
7756   else if (BUFFERP (dst_object))
7757     {
7758       code_conversion_save (0, 0);
7759       coding->dst_object = dst_object;
7760       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7761       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7762       coding->dst_multibyte
7763         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7764     }
7765   else
7766     {
7767       code_conversion_save (0, 0);
7768       coding->dst_object = Qnil;
7769       /* Most callers presume this will return a multibyte result, and they
7770          won't use `binary' or `raw-text' anyway, so let's not worry about
7771          CODING_FOR_UNIBYTE.  */
7772       coding->dst_multibyte = 1;
7773     }
7774
7775   decode_coding (coding);
7776
7777   if (BUFFERP (coding->dst_object))
7778     set_buffer_internal (XBUFFER (coding->dst_object));
7779
7780   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7781     {
7782       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7783       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7784       Lisp_Object val;
7785
7786       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7787       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7788               old_deactivate_mark);
7789       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7790                         make_number (coding->produced_char));
7791       UNGCPRO;
7792       CHECK_NATNUM (val);
7793       coding->produced_char += Z - prev_Z;
7794       coding->produced += Z_BYTE - prev_Z_BYTE;
7795     }
7796
7797   if (EQ (dst_object, Qt))
7798     {
7799       coding->dst_object = Fbuffer_string ();
7800     }
7801   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7802     {
7803       set_buffer_internal (XBUFFER (coding->dst_object));
7804       if (dst_bytes < coding->produced)
7805         {
7806           destination = xrealloc (destination, coding->produced);
7807           if (! destination)
7808             {
7809               record_conversion_result (coding,
7810                                         CODING_RESULT_INSUFFICIENT_MEM);
7811               unbind_to (count, Qnil);
7812               return;
7813             }
7814           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7815             move_gap_both (BEGV, BEGV_BYTE);
7816           memcpy (destination, BEGV_ADDR, coding->produced);
7817           coding->destination = destination;
7818         }
7819     }
7820
7821   if (saved_pt >= 0)
7822     {
7823       /* This is the case of:
7824          (BUFFERP (src_object) && EQ (src_object, dst_object))
7825          As we have moved PT while replacing the original buffer
7826          contents, we must recover it now.  */
7827       set_buffer_internal (XBUFFER (src_object));
7828       current_buffer->text->inhibit_shrinking = 0;
7829       if (saved_pt < from)
7830         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7831       else if (saved_pt < from + chars)
7832         TEMP_SET_PT_BOTH (from, from_byte);
7833       else if (! NILP (current_buffer->enable_multibyte_characters))
7834         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7835                           saved_pt_byte + (coding->produced - bytes));
7836       else
7837         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7838                           saved_pt_byte + (coding->produced - bytes));
7839
7840       if (need_marker_adjustment)
7841         {
7842           struct Lisp_Marker *tail;
7843
7844           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7845             if (tail->need_adjustment)
7846               {
7847                 tail->need_adjustment = 0;
7848                 if (tail->insertion_type)
7849                   {
7850                     tail->bytepos = from_byte;
7851                     tail->charpos = from;
7852                   }
7853                 else
7854                   {
7855                     tail->bytepos = from_byte + coding->produced;
7856                     tail->charpos
7857                       = (NILP (current_buffer->enable_multibyte_characters)
7858                          ? tail->bytepos : from + coding->produced_char);
7859                   }
7860               }
7861         }
7862     }
7863
7864   Vdeactivate_mark = old_deactivate_mark;
7865   unbind_to (count, coding->dst_object);
7866 }
7867
7868
7869 void
7870 encode_coding_object (struct coding_system *coding,
7871                       Lisp_Object src_object,
7872                       EMACS_INT from, EMACS_INT from_byte,
7873                       EMACS_INT to, EMACS_INT to_byte,
7874                       Lisp_Object dst_object)
7875 {
7876   int count = SPECPDL_INDEX ();
7877   EMACS_INT chars = to - from;
7878   EMACS_INT bytes = to_byte - from_byte;
7879   Lisp_Object attrs;
7880   int saved_pt = -1, saved_pt_byte;
7881   int need_marker_adjustment = 0;
7882   int kill_src_buffer = 0;
7883   Lisp_Object old_deactivate_mark;
7884
7885   old_deactivate_mark = Vdeactivate_mark;
7886
7887   coding->src_object = src_object;
7888   coding->src_chars = chars;
7889   coding->src_bytes = bytes;
7890   coding->src_multibyte = chars < bytes;
7891
7892   attrs = CODING_ID_ATTRS (coding->id);
7893
7894   if (EQ (src_object, dst_object))
7895     {
7896       struct Lisp_Marker *tail;
7897
7898       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7899         {
7900           tail->need_adjustment
7901             = tail->charpos == (tail->insertion_type ? from : to);
7902           need_marker_adjustment |= tail->need_adjustment;
7903         }
7904     }
7905
7906   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7907     {
7908       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7909       set_buffer_internal (XBUFFER (coding->src_object));
7910       if (STRINGP (src_object))
7911         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7912       else if (BUFFERP (src_object))
7913         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7914       else
7915         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7916
7917       if (EQ (src_object, dst_object))
7918         {
7919           set_buffer_internal (XBUFFER (src_object));
7920           saved_pt = PT, saved_pt_byte = PT_BYTE;
7921           del_range_both (from, from_byte, to, to_byte, 1);
7922           set_buffer_internal (XBUFFER (coding->src_object));
7923         }
7924
7925       {
7926         Lisp_Object args[3];
7927         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7928
7929         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7930                 old_deactivate_mark);
7931         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7932         args[1] = make_number (BEG);
7933         args[2] = make_number (Z);
7934         safe_call (3, args);
7935         UNGCPRO;
7936       }
7937       if (XBUFFER (coding->src_object) != current_buffer)
7938         kill_src_buffer = 1;
7939       coding->src_object = Fcurrent_buffer ();
7940       if (BEG != GPT)
7941         move_gap_both (BEG, BEG_BYTE);
7942       coding->src_chars = Z - BEG;
7943       coding->src_bytes = Z_BYTE - BEG_BYTE;
7944       coding->src_pos = BEG;
7945       coding->src_pos_byte = BEG_BYTE;
7946       coding->src_multibyte = Z < Z_BYTE;
7947     }
7948   else if (STRINGP (src_object))
7949     {
7950       code_conversion_save (0, 0);
7951       coding->src_pos = from;
7952       coding->src_pos_byte = from_byte;
7953     }
7954   else if (BUFFERP (src_object))
7955     {
7956       code_conversion_save (0, 0);
7957       set_buffer_internal (XBUFFER (src_object));
7958       if (EQ (src_object, dst_object))
7959         {
7960           saved_pt = PT, saved_pt_byte = PT_BYTE;
7961           coding->src_object = del_range_1 (from, to, 1, 1);
7962           coding->src_pos = 0;
7963           coding->src_pos_byte = 0;
7964         }
7965       else
7966         {
7967           if (from < GPT && to >= GPT)
7968             move_gap_both (from, from_byte);
7969           coding->src_pos = from;
7970           coding->src_pos_byte = from_byte;
7971         }
7972     }
7973   else
7974     code_conversion_save (0, 0);
7975
7976   if (BUFFERP (dst_object))
7977     {
7978       coding->dst_object = dst_object;
7979       if (EQ (src_object, dst_object))
7980         {
7981           coding->dst_pos = from;
7982           coding->dst_pos_byte = from_byte;
7983         }
7984       else
7985         {
7986           struct buffer *current = current_buffer;
7987
7988           set_buffer_temp (XBUFFER (dst_object));
7989           coding->dst_pos = PT;
7990           coding->dst_pos_byte = PT_BYTE;
7991           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7992           set_buffer_temp (current);
7993         }
7994       coding->dst_multibyte
7995         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7996     }
7997   else if (EQ (dst_object, Qt))
7998     {
7999       coding->dst_object = Qnil;
8000       coding->dst_bytes = coding->src_chars;
8001       if (coding->dst_bytes == 0)
8002         coding->dst_bytes = 1;
8003       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
8004       coding->dst_multibyte = 0;
8005     }
8006   else
8007     {
8008       coding->dst_object = Qnil;
8009       coding->dst_multibyte = 0;
8010     }
8011
8012   encode_coding (coding);
8013
8014   if (EQ (dst_object, Qt))
8015     {
8016       if (BUFFERP (coding->dst_object))
8017         coding->dst_object = Fbuffer_string ();
8018       else
8019         {
8020           coding->dst_object
8021             = make_unibyte_string ((char *) coding->destination,
8022                                    coding->produced);
8023           xfree (coding->destination);
8024         }
8025     }
8026
8027   if (saved_pt >= 0)
8028     {
8029       /* This is the case of:
8030          (BUFFERP (src_object) && EQ (src_object, dst_object))
8031          As we have moved PT while replacing the original buffer
8032          contents, we must recover it now.  */
8033       set_buffer_internal (XBUFFER (src_object));
8034       if (saved_pt < from)
8035         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8036       else if (saved_pt < from + chars)
8037         TEMP_SET_PT_BOTH (from, from_byte);
8038       else if (! NILP (current_buffer->enable_multibyte_characters))
8039         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8040                           saved_pt_byte + (coding->produced - bytes));
8041       else
8042         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8043                           saved_pt_byte + (coding->produced - bytes));
8044
8045       if (need_marker_adjustment)
8046         {
8047           struct Lisp_Marker *tail;
8048
8049           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8050             if (tail->need_adjustment)
8051               {
8052                 tail->need_adjustment = 0;
8053                 if (tail->insertion_type)
8054                   {
8055                     tail->bytepos = from_byte;
8056                     tail->charpos = from;
8057                   }
8058                 else
8059                   {
8060                     tail->bytepos = from_byte + coding->produced;
8061                     tail->charpos
8062                       = (NILP (current_buffer->enable_multibyte_characters)
8063                          ? tail->bytepos : from + coding->produced_char);
8064                   }
8065               }
8066         }
8067     }
8068
8069   if (kill_src_buffer)
8070     Fkill_buffer (coding->src_object);
8071
8072   Vdeactivate_mark = old_deactivate_mark;
8073   unbind_to (count, Qnil);
8074 }
8075
8076
8077 Lisp_Object
8078 preferred_coding_system (void)
8079 {
8080   int id = coding_categories[coding_priorities[0]].id;
8081
8082   return CODING_ID_NAME (id);
8083 }
8084
8085 \f
8086 #ifdef emacs
8087 /*** 8. Emacs Lisp library functions ***/
8088
8089 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8090        doc: /* Return t if OBJECT is nil or a coding-system.
8091 See the documentation of `define-coding-system' for information
8092 about coding-system objects.  */)
8093   (Lisp_Object object)
8094 {
8095   if (NILP (object)
8096       || CODING_SYSTEM_ID (object) >= 0)
8097     return Qt;
8098   if (! SYMBOLP (object)
8099       || NILP (Fget (object, Qcoding_system_define_form)))
8100     return Qnil;
8101   return Qt;
8102 }
8103
8104 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8105        Sread_non_nil_coding_system, 1, 1, 0,
8106        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8107   (Lisp_Object prompt)
8108 {
8109   Lisp_Object val;
8110   do
8111     {
8112       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8113                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8114     }
8115   while (SCHARS (val) == 0);
8116   return (Fintern (val, Qnil));
8117 }
8118
8119 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8120        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8121 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8122 Ignores case when completing coding systems (all Emacs coding systems
8123 are lower-case).  */)
8124   (Lisp_Object prompt, Lisp_Object default_coding_system)
8125 {
8126   Lisp_Object val;
8127   int count = SPECPDL_INDEX ();
8128
8129   if (SYMBOLP (default_coding_system))
8130     default_coding_system = SYMBOL_NAME (default_coding_system);
8131   specbind (Qcompletion_ignore_case, Qt);
8132   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8133                           Qt, Qnil, Qcoding_system_history,
8134                           default_coding_system, Qnil);
8135   unbind_to (count, Qnil);
8136   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8137 }
8138
8139 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8140        1, 1, 0,
8141        doc: /* Check validity of CODING-SYSTEM.
8142 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8143 It is valid if it is nil or a symbol defined as a coding system by the
8144 function `define-coding-system'.  */)
8145   (Lisp_Object coding_system)
8146 {
8147   Lisp_Object define_form;
8148
8149   define_form = Fget (coding_system, Qcoding_system_define_form);
8150   if (! NILP (define_form))
8151     {
8152       Fput (coding_system, Qcoding_system_define_form, Qnil);
8153       safe_eval (define_form);
8154     }
8155   if (!NILP (Fcoding_system_p (coding_system)))
8156     return coding_system;
8157   xsignal1 (Qcoding_system_error, coding_system);
8158 }
8159
8160 \f
8161 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8162    HIGHEST is nonzero, return the coding system of the highest
8163    priority among the detected coding systems.  Otherwize return a
8164    list of detected coding systems sorted by their priorities.  If
8165    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8166    multibyte form but contains only ASCII and eight-bit chars.
8167    Otherwise, the bytes are raw bytes.
8168
8169    CODING-SYSTEM controls the detection as below:
8170
8171    If it is nil, detect both text-format and eol-format.  If the
8172    text-format part of CODING-SYSTEM is already specified
8173    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8174    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8175    detect only text-format.  */
8176
8177 Lisp_Object
8178 detect_coding_system (const unsigned char *src,
8179                       EMACS_INT src_chars, EMACS_INT src_bytes,
8180                       int highest, int multibytep,
8181                       Lisp_Object coding_system)
8182 {
8183   const unsigned char *src_end = src + src_bytes;
8184   Lisp_Object attrs, eol_type;
8185   Lisp_Object val = Qnil;
8186   struct coding_system coding;
8187   int id;
8188   struct coding_detection_info detect_info;
8189   enum coding_category base_category;
8190   int null_byte_found = 0, eight_bit_found = 0;
8191
8192   if (NILP (coding_system))
8193     coding_system = Qundecided;
8194   setup_coding_system (coding_system, &coding);
8195   attrs = CODING_ID_ATTRS (coding.id);
8196   eol_type = CODING_ID_EOL_TYPE (coding.id);
8197   coding_system = CODING_ATTR_BASE_NAME (attrs);
8198
8199   coding.source = src;
8200   coding.src_chars = src_chars;
8201   coding.src_bytes = src_bytes;
8202   coding.src_multibyte = multibytep;
8203   coding.consumed = 0;
8204   coding.mode |= CODING_MODE_LAST_BLOCK;
8205   coding.head_ascii = 0;
8206
8207   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8208
8209   /* At first, detect text-format if necessary.  */
8210   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8211   if (base_category == coding_category_undecided)
8212     {
8213       enum coding_category category;
8214       struct coding_system *this;
8215       int c, i;
8216
8217       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8218       for (; src < src_end; src++)
8219         {
8220           c = *src;
8221           if (c & 0x80)
8222             {
8223               eight_bit_found = 1;
8224               if (null_byte_found)
8225                 break;
8226             }
8227           else if (c < 0x20)
8228             {
8229               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8230                   && ! inhibit_iso_escape_detection
8231                   && ! detect_info.checked)
8232                 {
8233                   if (detect_coding_iso_2022 (&coding, &detect_info))
8234                     {
8235                       /* We have scanned the whole data.  */
8236                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8237                         {
8238                           /* We didn't find an 8-bit code.  We may
8239                              have found a null-byte, but it's very
8240                              rare that a binary file confirm to
8241                              ISO-2022.  */
8242                           src = src_end;
8243                           coding.head_ascii = src - coding.source;
8244                         }
8245                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8246                       break;
8247                     }
8248                 }
8249               else if (! c && !inhibit_null_byte_detection)
8250                 {
8251                   null_byte_found = 1;
8252                   if (eight_bit_found)
8253                     break;
8254                 }
8255               if (! eight_bit_found)
8256                 coding.head_ascii++;
8257             }
8258           else if (! eight_bit_found)
8259             coding.head_ascii++;
8260         }
8261
8262       if (null_byte_found || eight_bit_found
8263           || coding.head_ascii < coding.src_bytes
8264           || detect_info.found)
8265         {
8266           if (coding.head_ascii == coding.src_bytes)
8267             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8268             for (i = 0; i < coding_category_raw_text; i++)
8269               {
8270                 category = coding_priorities[i];
8271                 this = coding_categories + category;
8272                 if (detect_info.found & (1 << category))
8273                   break;
8274               }
8275           else
8276             {
8277               if (null_byte_found)
8278                 {
8279                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8280                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8281                 }
8282               for (i = 0; i < coding_category_raw_text; i++)
8283                 {
8284                   category = coding_priorities[i];
8285                   this = coding_categories + category;
8286
8287                   if (this->id < 0)
8288                     {
8289                       /* No coding system of this category is defined.  */
8290                       detect_info.rejected |= (1 << category);
8291                     }
8292                   else if (category >= coding_category_raw_text)
8293                     continue;
8294                   else if (detect_info.checked & (1 << category))
8295                     {
8296                       if (highest
8297                           && (detect_info.found & (1 << category)))
8298                         break;
8299                     }
8300                   else if ((*(this->detector)) (&coding, &detect_info)
8301                            && highest
8302                            && (detect_info.found & (1 << category)))
8303                     {
8304                       if (category == coding_category_utf_16_auto)
8305                         {
8306                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8307                             category = coding_category_utf_16_le;
8308                           else
8309                             category = coding_category_utf_16_be;
8310                         }
8311                       break;
8312                     }
8313                 }
8314             }
8315         }
8316
8317       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8318           || null_byte_found)
8319         {
8320           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8321           id = CODING_SYSTEM_ID (Qno_conversion);
8322           val = Fcons (make_number (id), Qnil);
8323         }
8324       else if (! detect_info.rejected && ! detect_info.found)
8325         {
8326           detect_info.found = CATEGORY_MASK_ANY;
8327           id = coding_categories[coding_category_undecided].id;
8328           val = Fcons (make_number (id), Qnil);
8329         }
8330       else if (highest)
8331         {
8332           if (detect_info.found)
8333             {
8334               detect_info.found = 1 << category;
8335               val = Fcons (make_number (this->id), Qnil);
8336             }
8337           else
8338             for (i = 0; i < coding_category_raw_text; i++)
8339               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8340                 {
8341                   detect_info.found = 1 << coding_priorities[i];
8342                   id = coding_categories[coding_priorities[i]].id;
8343                   val = Fcons (make_number (id), Qnil);
8344                   break;
8345                 }
8346         }
8347       else
8348         {
8349           int mask = detect_info.rejected | detect_info.found;
8350           int found = 0;
8351
8352           for (i = coding_category_raw_text - 1; i >= 0; i--)
8353             {
8354               category = coding_priorities[i];
8355               if (! (mask & (1 << category)))
8356                 {
8357                   found |= 1 << category;
8358                   id = coding_categories[category].id;
8359                   if (id >= 0)
8360                     val = Fcons (make_number (id), val);
8361                 }
8362             }
8363           for (i = coding_category_raw_text - 1; i >= 0; i--)
8364             {
8365               category = coding_priorities[i];
8366               if (detect_info.found & (1 << category))
8367                 {
8368                   id = coding_categories[category].id;
8369                   val = Fcons (make_number (id), val);
8370                 }
8371             }
8372           detect_info.found |= found;
8373         }
8374     }
8375   else if (base_category == coding_category_utf_8_auto)
8376     {
8377       if (detect_coding_utf_8 (&coding, &detect_info))
8378         {
8379           struct coding_system *this;
8380
8381           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8382             this = coding_categories + coding_category_utf_8_sig;
8383           else
8384             this = coding_categories + coding_category_utf_8_nosig;
8385           val = Fcons (make_number (this->id), Qnil);
8386         }
8387     }
8388   else if (base_category == coding_category_utf_16_auto)
8389     {
8390       if (detect_coding_utf_16 (&coding, &detect_info))
8391         {
8392           struct coding_system *this;
8393
8394           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8395             this = coding_categories + coding_category_utf_16_le;
8396           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8397             this = coding_categories + coding_category_utf_16_be;
8398           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8399             this = coding_categories + coding_category_utf_16_be_nosig;
8400           else
8401             this = coding_categories + coding_category_utf_16_le_nosig;
8402           val = Fcons (make_number (this->id), Qnil);
8403         }
8404     }
8405   else
8406     {
8407       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8408       val = Fcons (make_number (coding.id), Qnil);
8409     }
8410
8411   /* Then, detect eol-format if necessary.  */
8412   {
8413     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8414     Lisp_Object tail;
8415
8416     if (VECTORP (eol_type))
8417       {
8418         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8419           {
8420             if (null_byte_found)
8421               normal_eol = EOL_SEEN_LF;
8422             else
8423               normal_eol = detect_eol (coding.source, src_bytes,
8424                                        coding_category_raw_text);
8425           }
8426         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8427                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8428           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8429                                       coding_category_utf_16_be);
8430         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8431                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8432           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8433                                       coding_category_utf_16_le);
8434       }
8435     else
8436       {
8437         if (EQ (eol_type, Qunix))
8438           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8439         else if (EQ (eol_type, Qdos))
8440           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8441         else
8442           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8443       }
8444
8445     for (tail = val; CONSP (tail); tail = XCDR (tail))
8446       {
8447         enum coding_category category;
8448         int this_eol;
8449
8450         id = XINT (XCAR (tail));
8451         attrs = CODING_ID_ATTRS (id);
8452         category = XINT (CODING_ATTR_CATEGORY (attrs));
8453         eol_type = CODING_ID_EOL_TYPE (id);
8454         if (VECTORP (eol_type))
8455           {
8456             if (category == coding_category_utf_16_be
8457                 || category == coding_category_utf_16_be_nosig)
8458               this_eol = utf_16_be_eol;
8459             else if (category == coding_category_utf_16_le
8460                      || category == coding_category_utf_16_le_nosig)
8461               this_eol = utf_16_le_eol;
8462             else
8463               this_eol = normal_eol;
8464
8465             if (this_eol == EOL_SEEN_LF)
8466               XSETCAR (tail, AREF (eol_type, 0));
8467             else if (this_eol == EOL_SEEN_CRLF)
8468               XSETCAR (tail, AREF (eol_type, 1));
8469             else if (this_eol == EOL_SEEN_CR)
8470               XSETCAR (tail, AREF (eol_type, 2));
8471             else
8472               XSETCAR (tail, CODING_ID_NAME (id));
8473           }
8474         else
8475           XSETCAR (tail, CODING_ID_NAME (id));
8476       }
8477   }
8478
8479   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8480 }
8481
8482
8483 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8484        2, 3, 0,
8485        doc: /* Detect coding system of the text in the region between START and END.
8486 Return a list of possible coding systems ordered by priority.
8487 The coding systems to try and their priorities follows what
8488 the function `coding-system-priority-list' (which see) returns.
8489
8490 If only ASCII characters are found (except for such ISO-2022 control
8491 characters as ESC), it returns a list of single element `undecided'
8492 or its subsidiary coding system according to a detected end-of-line
8493 format.
8494
8495 If optional argument HIGHEST is non-nil, return the coding system of
8496 highest priority.  */)
8497   (Lisp_Object start, Lisp_Object end, Lisp_Object highest)
8498 {
8499   int from, to;
8500   int from_byte, to_byte;
8501
8502   CHECK_NUMBER_COERCE_MARKER (start);
8503   CHECK_NUMBER_COERCE_MARKER (end);
8504
8505   validate_region (&start, &end);
8506   from = XINT (start), to = XINT (end);
8507   from_byte = CHAR_TO_BYTE (from);
8508   to_byte = CHAR_TO_BYTE (to);
8509
8510   if (from < GPT && to >= GPT)
8511     move_gap_both (to, to_byte);
8512
8513   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8514                                to - from, to_byte - from_byte,
8515                                !NILP (highest),
8516                                !NILP (current_buffer
8517                                       ->enable_multibyte_characters),
8518                                Qnil);
8519 }
8520
8521 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8522        1, 2, 0,
8523        doc: /* Detect coding system of the text in STRING.
8524 Return a list of possible coding systems ordered by priority.
8525 The coding systems to try and their priorities follows what
8526 the function `coding-system-priority-list' (which see) returns.
8527
8528 If only ASCII characters are found (except for such ISO-2022 control
8529 characters as ESC), it returns a list of single element `undecided'
8530 or its subsidiary coding system according to a detected end-of-line
8531 format.
8532
8533 If optional argument HIGHEST is non-nil, return the coding system of
8534 highest priority.  */)
8535   (Lisp_Object string, Lisp_Object highest)
8536 {
8537   CHECK_STRING (string);
8538
8539   return detect_coding_system (SDATA (string),
8540                                SCHARS (string), SBYTES (string),
8541                                !NILP (highest), STRING_MULTIBYTE (string),
8542                                Qnil);
8543 }
8544
8545
8546 static INLINE int
8547 char_encodable_p (int c, Lisp_Object attrs)
8548 {
8549   Lisp_Object tail;
8550   struct charset *charset;
8551   Lisp_Object translation_table;
8552
8553   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8554   if (! NILP (translation_table))
8555     c = translate_char (translation_table, c);
8556   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8557        CONSP (tail); tail = XCDR (tail))
8558     {
8559       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8560       if (CHAR_CHARSET_P (c, charset))
8561         break;
8562     }
8563   return (! NILP (tail));
8564 }
8565
8566
8567 /* Return a list of coding systems that safely encode the text between
8568    START and END.  If EXCLUDE is non-nil, it is a list of coding
8569    systems not to check.  The returned list doesn't contain any such
8570    coding systems.  In any case, if the text contains only ASCII or is
8571    unibyte, return t.  */
8572
8573 DEFUN ("find-coding-systems-region-internal",
8574        Ffind_coding_systems_region_internal,
8575        Sfind_coding_systems_region_internal, 2, 3, 0,
8576        doc: /* Internal use only.  */)
8577   (Lisp_Object start, Lisp_Object end, Lisp_Object exclude)
8578 {
8579   Lisp_Object coding_attrs_list, safe_codings;
8580   EMACS_INT start_byte, end_byte;
8581   const unsigned char *p, *pbeg, *pend;
8582   int c;
8583   Lisp_Object tail, elt, work_table;
8584
8585   if (STRINGP (start))
8586     {
8587       if (!STRING_MULTIBYTE (start)
8588           || SCHARS (start) == SBYTES (start))
8589         return Qt;
8590       start_byte = 0;
8591       end_byte = SBYTES (start);
8592     }
8593   else
8594     {
8595       CHECK_NUMBER_COERCE_MARKER (start);
8596       CHECK_NUMBER_COERCE_MARKER (end);
8597       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8598         args_out_of_range (start, end);
8599       if (NILP (current_buffer->enable_multibyte_characters))
8600         return Qt;
8601       start_byte = CHAR_TO_BYTE (XINT (start));
8602       end_byte = CHAR_TO_BYTE (XINT (end));
8603       if (XINT (end) - XINT (start) == end_byte - start_byte)
8604         return Qt;
8605
8606       if (XINT (start) < GPT && XINT (end) > GPT)
8607         {
8608           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8609             move_gap_both (XINT (start), start_byte);
8610           else
8611             move_gap_both (XINT (end), end_byte);
8612         }
8613     }
8614
8615   coding_attrs_list = Qnil;
8616   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8617     if (NILP (exclude)
8618         || NILP (Fmemq (XCAR (tail), exclude)))
8619       {
8620         Lisp_Object attrs;
8621
8622         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8623         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8624             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8625           {
8626             ASET (attrs, coding_attr_trans_tbl,
8627                   get_translation_table (attrs, 1, NULL));
8628             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8629           }
8630       }
8631
8632   if (STRINGP (start))
8633     p = pbeg = SDATA (start);
8634   else
8635     p = pbeg = BYTE_POS_ADDR (start_byte);
8636   pend = p + (end_byte - start_byte);
8637
8638   while (p < pend && ASCII_BYTE_P (*p)) p++;
8639   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8640
8641   work_table = Fmake_char_table (Qnil, Qnil);
8642   while (p < pend)
8643     {
8644       if (ASCII_BYTE_P (*p))
8645         p++;
8646       else
8647         {
8648           c = STRING_CHAR_ADVANCE (p);
8649           if (!NILP (char_table_ref (work_table, c)))
8650             /* This character was already checked.  Ignore it.  */
8651             continue;
8652
8653           charset_map_loaded = 0;
8654           for (tail = coding_attrs_list; CONSP (tail);)
8655             {
8656               elt = XCAR (tail);
8657               if (NILP (elt))
8658                 tail = XCDR (tail);
8659               else if (char_encodable_p (c, elt))
8660                 tail = XCDR (tail);
8661               else if (CONSP (XCDR (tail)))
8662                 {
8663                   XSETCAR (tail, XCAR (XCDR (tail)));
8664                   XSETCDR (tail, XCDR (XCDR (tail)));
8665                 }
8666               else
8667                 {
8668                   XSETCAR (tail, Qnil);
8669                   tail = XCDR (tail);
8670                 }
8671             }
8672           if (charset_map_loaded)
8673             {
8674               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8675
8676               if (STRINGP (start))
8677                 pbeg = SDATA (start);
8678               else
8679                 pbeg = BYTE_POS_ADDR (start_byte);
8680               p = pbeg + p_offset;
8681               pend = pbeg + pend_offset;
8682             }
8683           char_table_set (work_table, c, Qt);
8684         }
8685     }
8686
8687   safe_codings = list2 (Qraw_text, Qno_conversion);
8688   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8689     if (! NILP (XCAR (tail)))
8690       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8691
8692   return safe_codings;
8693 }
8694
8695
8696 DEFUN ("unencodable-char-position", Funencodable_char_position,
8697        Sunencodable_char_position, 3, 5, 0,
8698        doc: /*
8699 Return position of first un-encodable character in a region.
8700 START and END specify the region and CODING-SYSTEM specifies the
8701 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8702
8703 If optional 4th argument COUNT is non-nil, it specifies at most how
8704 many un-encodable characters to search.  In this case, the value is a
8705 list of positions.
8706
8707 If optional 5th argument STRING is non-nil, it is a string to search
8708 for un-encodable characters.  In that case, START and END are indexes
8709 to the string.  */)
8710   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string)
8711 {
8712   int n;
8713   struct coding_system coding;
8714   Lisp_Object attrs, charset_list, translation_table;
8715   Lisp_Object positions;
8716   int from, to;
8717   const unsigned char *p, *stop, *pend;
8718   int ascii_compatible;
8719
8720   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8721   attrs = CODING_ID_ATTRS (coding.id);
8722   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8723     return Qnil;
8724   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8725   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8726   translation_table = get_translation_table (attrs, 1, NULL);
8727
8728   if (NILP (string))
8729     {
8730       validate_region (&start, &end);
8731       from = XINT (start);
8732       to = XINT (end);
8733       if (NILP (current_buffer->enable_multibyte_characters)
8734           || (ascii_compatible
8735               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8736         return Qnil;
8737       p = CHAR_POS_ADDR (from);
8738       pend = CHAR_POS_ADDR (to);
8739       if (from < GPT && to >= GPT)
8740         stop = GPT_ADDR;
8741       else
8742         stop = pend;
8743     }
8744   else
8745     {
8746       CHECK_STRING (string);
8747       CHECK_NATNUM (start);
8748       CHECK_NATNUM (end);
8749       from = XINT (start);
8750       to = XINT (end);
8751       if (from > to
8752           || to > SCHARS (string))
8753         args_out_of_range_3 (string, start, end);
8754       if (! STRING_MULTIBYTE (string))
8755         return Qnil;
8756       p = SDATA (string) + string_char_to_byte (string, from);
8757       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8758       if (ascii_compatible && (to - from) == (pend - p))
8759         return Qnil;
8760     }
8761
8762   if (NILP (count))
8763     n = 1;
8764   else
8765     {
8766       CHECK_NATNUM (count);
8767       n = XINT (count);
8768     }
8769
8770   positions = Qnil;
8771   while (1)
8772     {
8773       int c;
8774
8775       if (ascii_compatible)
8776         while (p < stop && ASCII_BYTE_P (*p))
8777           p++, from++;
8778       if (p >= stop)
8779         {
8780           if (p >= pend)
8781             break;
8782           stop = pend;
8783           p = GAP_END_ADDR;
8784         }
8785
8786       c = STRING_CHAR_ADVANCE (p);
8787       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8788           && ! char_charset (translate_char (translation_table, c),
8789                              charset_list, NULL))
8790         {
8791           positions = Fcons (make_number (from), positions);
8792           n--;
8793           if (n == 0)
8794             break;
8795         }
8796
8797       from++;
8798     }
8799
8800   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8801 }
8802
8803
8804 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8805        Scheck_coding_systems_region, 3, 3, 0,
8806        doc: /* Check if the region is encodable by coding systems.
8807
8808 START and END are buffer positions specifying the region.
8809 CODING-SYSTEM-LIST is a list of coding systems to check.
8810
8811 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8812 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8813 whole region, POS0, POS1, ... are buffer positions where non-encodable
8814 characters are found.
8815
8816 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8817 value is nil.
8818
8819 START may be a string.  In that case, check if the string is
8820 encodable, and the value contains indices to the string instead of
8821 buffer positions.  END is ignored.
8822
8823 If the current buffer (or START if it is a string) is unibyte, the value
8824 is nil.  */)
8825   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system_list)
8826 {
8827   Lisp_Object list;
8828   EMACS_INT start_byte, end_byte;
8829   int pos;
8830   const unsigned char *p, *pbeg, *pend;
8831   int c;
8832   Lisp_Object tail, elt, attrs;
8833
8834   if (STRINGP (start))
8835     {
8836       if (!STRING_MULTIBYTE (start)
8837           || SCHARS (start) == SBYTES (start))
8838         return Qnil;
8839       start_byte = 0;
8840       end_byte = SBYTES (start);
8841       pos = 0;
8842     }
8843   else
8844     {
8845       CHECK_NUMBER_COERCE_MARKER (start);
8846       CHECK_NUMBER_COERCE_MARKER (end);
8847       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8848         args_out_of_range (start, end);
8849       if (NILP (current_buffer->enable_multibyte_characters))
8850         return Qnil;
8851       start_byte = CHAR_TO_BYTE (XINT (start));
8852       end_byte = CHAR_TO_BYTE (XINT (end));
8853       if (XINT (end) - XINT (start) == end_byte - start_byte)
8854         return Qnil;
8855
8856       if (XINT (start) < GPT && XINT (end) > GPT)
8857         {
8858           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8859             move_gap_both (XINT (start), start_byte);
8860           else
8861             move_gap_both (XINT (end), end_byte);
8862         }
8863       pos = XINT (start);
8864     }
8865
8866   list = Qnil;
8867   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8868     {
8869       elt = XCAR (tail);
8870       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8871       ASET (attrs, coding_attr_trans_tbl,
8872             get_translation_table (attrs, 1, NULL));
8873       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8874     }
8875
8876   if (STRINGP (start))
8877     p = pbeg = SDATA (start);
8878   else
8879     p = pbeg = BYTE_POS_ADDR (start_byte);
8880   pend = p + (end_byte - start_byte);
8881
8882   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8883   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8884
8885   while (p < pend)
8886     {
8887       if (ASCII_BYTE_P (*p))
8888         p++;
8889       else
8890         {
8891           c = STRING_CHAR_ADVANCE (p);
8892
8893           charset_map_loaded = 0;
8894           for (tail = list; CONSP (tail); tail = XCDR (tail))
8895             {
8896               elt = XCDR (XCAR (tail));
8897               if (! char_encodable_p (c, XCAR (elt)))
8898                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8899             }
8900           if (charset_map_loaded)
8901             {
8902               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8903
8904               if (STRINGP (start))
8905                 pbeg = SDATA (start);
8906               else
8907                 pbeg = BYTE_POS_ADDR (start_byte);
8908               p = pbeg + p_offset;
8909               pend = pbeg + pend_offset;
8910             }
8911         }
8912       pos++;
8913     }
8914
8915   tail = list;
8916   list = Qnil;
8917   for (; CONSP (tail); tail = XCDR (tail))
8918     {
8919       elt = XCAR (tail);
8920       if (CONSP (XCDR (XCDR (elt))))
8921         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8922                       list);
8923     }
8924
8925   return list;
8926 }
8927
8928
8929 Lisp_Object
8930 code_convert_region (Lisp_Object start, Lisp_Object end,
8931                      Lisp_Object coding_system, Lisp_Object dst_object,
8932                      int encodep, int norecord)
8933 {
8934   struct coding_system coding;
8935   EMACS_INT from, from_byte, to, to_byte;
8936   Lisp_Object src_object;
8937
8938   CHECK_NUMBER_COERCE_MARKER (start);
8939   CHECK_NUMBER_COERCE_MARKER (end);
8940   if (NILP (coding_system))
8941     coding_system = Qno_conversion;
8942   else
8943     CHECK_CODING_SYSTEM (coding_system);
8944   src_object = Fcurrent_buffer ();
8945   if (NILP (dst_object))
8946     dst_object = src_object;
8947   else if (! EQ (dst_object, Qt))
8948     CHECK_BUFFER (dst_object);
8949
8950   validate_region (&start, &end);
8951   from = XFASTINT (start);
8952   from_byte = CHAR_TO_BYTE (from);
8953   to = XFASTINT (end);
8954   to_byte = CHAR_TO_BYTE (to);
8955
8956   setup_coding_system (coding_system, &coding);
8957   coding.mode |= CODING_MODE_LAST_BLOCK;
8958
8959   if (encodep)
8960     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8961                           dst_object);
8962   else
8963     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8964                           dst_object);
8965   if (! norecord)
8966     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8967
8968   return (BUFFERP (dst_object)
8969           ? make_number (coding.produced_char)
8970           : coding.dst_object);
8971 }
8972
8973
8974 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8975        3, 4, "r\nzCoding system: ",
8976        doc: /* Decode the current region from the specified coding system.
8977 When called from a program, takes four arguments:
8978         START, END, CODING-SYSTEM, and DESTINATION.
8979 START and END are buffer positions.
8980
8981 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8982 If nil, the region between START and END is replaced by the decoded text.
8983 If buffer, the decoded text is inserted in that buffer after point (point
8984 does not move).
8985 In those cases, the length of the decoded text is returned.
8986 If DESTINATION is t, the decoded text is returned.
8987
8988 This function sets `last-coding-system-used' to the precise coding system
8989 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8990 not fully specified.)  */)
8991   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
8992 {
8993   return code_convert_region (start, end, coding_system, destination, 0, 0);
8994 }
8995
8996 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8997        3, 4, "r\nzCoding system: ",
8998        doc: /* Encode the current region by specified coding system.
8999 When called from a program, takes four arguments:
9000         START, END, CODING-SYSTEM and DESTINATION.
9001 START and END are buffer positions.
9002
9003 Optional 4th arguments DESTINATION specifies where the encoded text goes.
9004 If nil, the region between START and END is replace by the encoded text.
9005 If buffer, the encoded text is inserted in that buffer after point (point
9006 does not move).
9007 In those cases, the length of the encoded text is returned.
9008 If DESTINATION is t, the encoded text is returned.
9009
9010 This function sets `last-coding-system-used' to the precise coding system
9011 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9012 not fully specified.)  */)
9013   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
9014 {
9015   return code_convert_region (start, end, coding_system, destination, 1, 0);
9016 }
9017
9018 Lisp_Object
9019 code_convert_string (Lisp_Object string, Lisp_Object coding_system,
9020                      Lisp_Object dst_object, int encodep, int nocopy, int norecord)
9021 {
9022   struct coding_system coding;
9023   EMACS_INT chars, bytes;
9024
9025   CHECK_STRING (string);
9026   if (NILP (coding_system))
9027     {
9028       if (! norecord)
9029         Vlast_coding_system_used = Qno_conversion;
9030       if (NILP (dst_object))
9031         return (nocopy ? Fcopy_sequence (string) : string);
9032     }
9033
9034   if (NILP (coding_system))
9035     coding_system = Qno_conversion;
9036   else
9037     CHECK_CODING_SYSTEM (coding_system);
9038   if (NILP (dst_object))
9039     dst_object = Qt;
9040   else if (! EQ (dst_object, Qt))
9041     CHECK_BUFFER (dst_object);
9042
9043   setup_coding_system (coding_system, &coding);
9044   coding.mode |= CODING_MODE_LAST_BLOCK;
9045   chars = SCHARS (string);
9046   bytes = SBYTES (string);
9047   if (encodep)
9048     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9049   else
9050     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9051   if (! norecord)
9052     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9053
9054   return (BUFFERP (dst_object)
9055           ? make_number (coding.produced_char)
9056           : coding.dst_object);
9057 }
9058
9059
9060 /* Encode or decode STRING according to CODING_SYSTEM.
9061    Do not set Vlast_coding_system_used.
9062
9063    This function is called only from macros DECODE_FILE and
9064    ENCODE_FILE, thus we ignore character composition.  */
9065
9066 Lisp_Object
9067 code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9068                               int encodep)
9069 {
9070   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9071 }
9072
9073
9074 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9075        2, 4, 0,
9076        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9077
9078 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9079 if the decoding operation is trivial.
9080
9081 Optional fourth arg BUFFER non-nil means that the decoded text is
9082 inserted in that buffer after point (point does not move).  In this
9083 case, the return value is the length of the decoded text.
9084
9085 This function sets `last-coding-system-used' to the precise coding system
9086 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9087 not fully specified.)  */)
9088   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9089 {
9090   return code_convert_string (string, coding_system, buffer,
9091                               0, ! NILP (nocopy), 0);
9092 }
9093
9094 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9095        2, 4, 0,
9096        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9097
9098 Optional third arg NOCOPY non-nil means it is OK to return STRING
9099 itself if the encoding operation is trivial.
9100
9101 Optional fourth arg BUFFER non-nil means that the encoded text is
9102 inserted in that buffer after point (point does not move).  In this
9103 case, the return value is the length of the encoded text.
9104
9105 This function sets `last-coding-system-used' to the precise coding system
9106 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9107 not fully specified.)  */)
9108   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9109 {
9110   return code_convert_string (string, coding_system, buffer,
9111                               1, ! NILP (nocopy), 1);
9112 }
9113
9114 \f
9115 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9116        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9117 Return the corresponding character.  */)
9118   (Lisp_Object code)
9119 {
9120   Lisp_Object spec, attrs, val;
9121   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9122   int c;
9123
9124   CHECK_NATNUM (code);
9125   c = XFASTINT (code);
9126   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9127   attrs = AREF (spec, 0);
9128
9129   if (ASCII_BYTE_P (c)
9130       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9131     return code;
9132
9133   val = CODING_ATTR_CHARSET_LIST (attrs);
9134   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9135   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9136   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9137
9138   if (c <= 0x7F)
9139     charset = charset_roman;
9140   else if (c >= 0xA0 && c < 0xDF)
9141     {
9142       charset = charset_kana;
9143       c -= 0x80;
9144     }
9145   else
9146     {
9147       int s1 = c >> 8, s2 = c & 0xFF;
9148
9149       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9150           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9151         error ("Invalid code: %d", code);
9152       SJIS_TO_JIS (c);
9153       charset = charset_kanji;
9154     }
9155   c = DECODE_CHAR (charset, c);
9156   if (c < 0)
9157     error ("Invalid code: %d", code);
9158   return make_number (c);
9159 }
9160
9161
9162 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9163        doc: /* Encode a Japanese character CH to shift_jis encoding.
9164 Return the corresponding code in SJIS.  */)
9165   (Lisp_Object ch)
9166 {
9167   Lisp_Object spec, attrs, charset_list;
9168   int c;
9169   struct charset *charset;
9170   unsigned code;
9171
9172   CHECK_CHARACTER (ch);
9173   c = XFASTINT (ch);
9174   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9175   attrs = AREF (spec, 0);
9176
9177   if (ASCII_CHAR_P (c)
9178       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9179     return ch;
9180
9181   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9182   charset = char_charset (c, charset_list, &code);
9183   if (code == CHARSET_INVALID_CODE (charset))
9184     error ("Can't encode by shift_jis encoding: %d", c);
9185   JIS_TO_SJIS (code);
9186
9187   return make_number (code);
9188 }
9189
9190 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9191        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9192 Return the corresponding character.  */)
9193   (Lisp_Object code)
9194 {
9195   Lisp_Object spec, attrs, val;
9196   struct charset *charset_roman, *charset_big5, *charset;
9197   int c;
9198
9199   CHECK_NATNUM (code);
9200   c = XFASTINT (code);
9201   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9202   attrs = AREF (spec, 0);
9203
9204   if (ASCII_BYTE_P (c)
9205       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9206     return code;
9207
9208   val = CODING_ATTR_CHARSET_LIST (attrs);
9209   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9210   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9211
9212   if (c <= 0x7F)
9213     charset = charset_roman;
9214   else
9215     {
9216       int b1 = c >> 8, b2 = c & 0x7F;
9217       if (b1 < 0xA1 || b1 > 0xFE
9218           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9219         error ("Invalid code: %d", code);
9220       charset = charset_big5;
9221     }
9222   c = DECODE_CHAR (charset, (unsigned )c);
9223   if (c < 0)
9224     error ("Invalid code: %d", code);
9225   return make_number (c);
9226 }
9227
9228 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9229        doc: /* Encode the Big5 character CH to BIG5 coding system.
9230 Return the corresponding character code in Big5.  */)
9231   (Lisp_Object ch)
9232 {
9233   Lisp_Object spec, attrs, charset_list;
9234   struct charset *charset;
9235   int c;
9236   unsigned code;
9237
9238   CHECK_CHARACTER (ch);
9239   c = XFASTINT (ch);
9240   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9241   attrs = AREF (spec, 0);
9242   if (ASCII_CHAR_P (c)
9243       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9244     return ch;
9245
9246   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9247   charset = char_charset (c, charset_list, &code);
9248   if (code == CHARSET_INVALID_CODE (charset))
9249     error ("Can't encode by Big5 encoding: %d", c);
9250
9251   return make_number (code);
9252 }
9253
9254 \f
9255 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9256        Sset_terminal_coding_system_internal, 1, 2, 0,
9257        doc: /* Internal use only.  */)
9258   (Lisp_Object coding_system, Lisp_Object terminal)
9259 {
9260   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9261   CHECK_SYMBOL (coding_system);
9262   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9263   /* We had better not send unsafe characters to terminal.  */
9264   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9265   /* Characer composition should be disabled.  */
9266   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9267   terminal_coding->src_multibyte = 1;
9268   terminal_coding->dst_multibyte = 0;
9269   return Qnil;
9270 }
9271
9272 DEFUN ("set-safe-terminal-coding-system-internal",
9273        Fset_safe_terminal_coding_system_internal,
9274        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9275        doc: /* Internal use only.  */)
9276   (Lisp_Object coding_system)
9277 {
9278   CHECK_SYMBOL (coding_system);
9279   setup_coding_system (Fcheck_coding_system (coding_system),
9280                        &safe_terminal_coding);
9281   /* Characer composition should be disabled.  */
9282   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9283   safe_terminal_coding.src_multibyte = 1;
9284   safe_terminal_coding.dst_multibyte = 0;
9285   return Qnil;
9286 }
9287
9288 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9289        Sterminal_coding_system, 0, 1, 0,
9290        doc: /* Return coding system specified for terminal output on the given terminal.
9291 TERMINAL may be a terminal object, a frame, or nil for the selected
9292 frame's terminal device.  */)
9293   (Lisp_Object terminal)
9294 {
9295   struct coding_system *terminal_coding
9296     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9297   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9298
9299   /* For backward compatibility, return nil if it is `undecided'. */
9300   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9301 }
9302
9303 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9304        Sset_keyboard_coding_system_internal, 1, 2, 0,
9305        doc: /* Internal use only.  */)
9306   (Lisp_Object coding_system, Lisp_Object terminal)
9307 {
9308   struct terminal *t = get_terminal (terminal, 1);
9309   CHECK_SYMBOL (coding_system);
9310   if (NILP (coding_system))
9311     coding_system = Qno_conversion;
9312   else
9313     Fcheck_coding_system (coding_system);
9314   setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9315   /* Characer composition should be disabled.  */
9316   TERMINAL_KEYBOARD_CODING (t)->common_flags
9317     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9318   return Qnil;
9319 }
9320
9321 DEFUN ("keyboard-coding-system",
9322        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9323        doc: /* Return coding system specified for decoding keyboard input.  */)
9324   (Lisp_Object terminal)
9325 {
9326   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9327                          (get_terminal (terminal, 1))->id);
9328 }
9329
9330 \f
9331 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9332        Sfind_operation_coding_system,  1, MANY, 0,
9333        doc: /* Choose a coding system for an operation based on the target name.
9334 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9335 DECODING-SYSTEM is the coding system to use for decoding
9336 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9337 for encoding (in case OPERATION does encoding).
9338
9339 The first argument OPERATION specifies an I/O primitive:
9340   For file I/O, `insert-file-contents' or `write-region'.
9341   For process I/O, `call-process', `call-process-region', or `start-process'.
9342   For network I/O, `open-network-stream'.
9343
9344 The remaining arguments should be the same arguments that were passed
9345 to the primitive.  Depending on which primitive, one of those arguments
9346 is selected as the TARGET.  For example, if OPERATION does file I/O,
9347 whichever argument specifies the file name is TARGET.
9348
9349 TARGET has a meaning which depends on OPERATION:
9350   For file I/O, TARGET is a file name (except for the special case below).
9351   For process I/O, TARGET is a process name.
9352   For network I/O, TARGET is a service name or a port number.
9353
9354 This function looks up what is specified for TARGET in
9355 `file-coding-system-alist', `process-coding-system-alist',
9356 or `network-coding-system-alist' depending on OPERATION.
9357 They may specify a coding system, a cons of coding systems,
9358 or a function symbol to call.
9359 In the last case, we call the function with one argument,
9360 which is a list of all the arguments given to this function.
9361 If the function can't decide a coding system, it can return
9362 `undecided' so that the normal code-detection is performed.
9363
9364 If OPERATION is `insert-file-contents', the argument corresponding to
9365 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9366 file name to look up, and BUFFER is a buffer that contains the file's
9367 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9368 function to call for FILENAME, that function should examine the
9369 contents of BUFFER instead of reading the file.
9370
9371 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9372   (int nargs, Lisp_Object *args)
9373 {
9374   Lisp_Object operation, target_idx, target, val;
9375   register Lisp_Object chain;
9376
9377   if (nargs < 2)
9378     error ("Too few arguments");
9379   operation = args[0];
9380   if (!SYMBOLP (operation)
9381       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
9382     error ("Invalid first argument");
9383   if (nargs < 1 + XINT (target_idx))
9384     error ("Too few arguments for operation: %s",
9385            SDATA (SYMBOL_NAME (operation)));
9386   target = args[XINT (target_idx) + 1];
9387   if (!(STRINGP (target)
9388         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9389             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9390         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9391     error ("Invalid %dth argument", XINT (target_idx) + 1);
9392   if (CONSP (target))
9393     target = XCAR (target);
9394
9395   chain = ((EQ (operation, Qinsert_file_contents)
9396             || EQ (operation, Qwrite_region))
9397            ? Vfile_coding_system_alist
9398            : (EQ (operation, Qopen_network_stream)
9399               ? Vnetwork_coding_system_alist
9400               : Vprocess_coding_system_alist));
9401   if (NILP (chain))
9402     return Qnil;
9403
9404   for (; CONSP (chain); chain = XCDR (chain))
9405     {
9406       Lisp_Object elt;
9407
9408       elt = XCAR (chain);
9409       if (CONSP (elt)
9410           && ((STRINGP (target)
9411                && STRINGP (XCAR (elt))
9412                && fast_string_match (XCAR (elt), target) >= 0)
9413               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9414         {
9415           val = XCDR (elt);
9416           /* Here, if VAL is both a valid coding system and a valid
9417              function symbol, we return VAL as a coding system.  */
9418           if (CONSP (val))
9419             return val;
9420           if (! SYMBOLP (val))
9421             return Qnil;
9422           if (! NILP (Fcoding_system_p (val)))
9423             return Fcons (val, val);
9424           if (! NILP (Ffboundp (val)))
9425             {
9426               /* We use call1 rather than safe_call1
9427                  so as to get bug reports about functions called here
9428                  which don't handle the current interface.  */
9429               val = call1 (val, Flist (nargs, args));
9430               if (CONSP (val))
9431                 return val;
9432               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9433                 return Fcons (val, val);
9434             }
9435           return Qnil;
9436         }
9437     }
9438   return Qnil;
9439 }
9440
9441 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9442        Sset_coding_system_priority, 0, MANY, 0,
9443        doc: /* Assign higher priority to the coding systems given as arguments.
9444 If multiple coding systems belong to the same category,
9445 all but the first one are ignored.
9446
9447 usage: (set-coding-system-priority &rest coding-systems)  */)
9448   (int nargs, Lisp_Object *args)
9449 {
9450   int i, j;
9451   int changed[coding_category_max];
9452   enum coding_category priorities[coding_category_max];
9453
9454   memset (changed, 0, sizeof changed);
9455
9456   for (i = j = 0; i < nargs; i++)
9457     {
9458       enum coding_category category;
9459       Lisp_Object spec, attrs;
9460
9461       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9462       attrs = AREF (spec, 0);
9463       category = XINT (CODING_ATTR_CATEGORY (attrs));
9464       if (changed[category])
9465         /* Ignore this coding system because a coding system of the
9466            same category already had a higher priority.  */
9467         continue;
9468       changed[category] = 1;
9469       priorities[j++] = category;
9470       if (coding_categories[category].id >= 0
9471           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9472         setup_coding_system (args[i], &coding_categories[category]);
9473       Fset (AREF (Vcoding_category_table, category), args[i]);
9474     }
9475
9476   /* Now we have decided top J priorities.  Reflect the order of the
9477      original priorities to the remaining priorities.  */
9478
9479   for (i = j, j = 0; i < coding_category_max; i++, j++)
9480     {
9481       while (j < coding_category_max
9482              && changed[coding_priorities[j]])
9483         j++;
9484       if (j == coding_category_max)
9485         abort ();
9486       priorities[i] = coding_priorities[j];
9487     }
9488
9489   memcpy (coding_priorities, priorities, sizeof priorities);
9490
9491   /* Update `coding-category-list'.  */
9492   Vcoding_category_list = Qnil;
9493   for (i = coding_category_max - 1; i >= 0; i--)
9494     Vcoding_category_list
9495       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9496                Vcoding_category_list);
9497
9498   return Qnil;
9499 }
9500
9501 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9502        Scoding_system_priority_list, 0, 1, 0,
9503        doc: /* Return a list of coding systems ordered by their priorities.
9504 The list contains a subset of coding systems; i.e. coding systems
9505 assigned to each coding category (see `coding-category-list').
9506
9507 HIGHESTP non-nil means just return the highest priority one.  */)
9508   (Lisp_Object highestp)
9509 {
9510   int i;
9511   Lisp_Object val;
9512
9513   for (i = 0, val = Qnil; i < coding_category_max; i++)
9514     {
9515       enum coding_category category = coding_priorities[i];
9516       int id = coding_categories[category].id;
9517       Lisp_Object attrs;
9518
9519       if (id < 0)
9520         continue;
9521       attrs = CODING_ID_ATTRS (id);
9522       if (! NILP (highestp))
9523         return CODING_ATTR_BASE_NAME (attrs);
9524       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9525     }
9526   return Fnreverse (val);
9527 }
9528
9529 static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
9530
9531 static Lisp_Object
9532 make_subsidiaries (Lisp_Object base)
9533 {
9534   Lisp_Object subsidiaries;
9535   int base_name_len = SBYTES (SYMBOL_NAME (base));
9536   char *buf = (char *) alloca (base_name_len + 6);
9537   int i;
9538
9539   memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
9540   subsidiaries = Fmake_vector (make_number (3), Qnil);
9541   for (i = 0; i < 3; i++)
9542     {
9543       memcpy (buf + base_name_len, suffixes[i], strlen (suffixes[i]) + 1);
9544       ASET (subsidiaries, i, intern (buf));
9545     }
9546   return subsidiaries;
9547 }
9548
9549
9550 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9551        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9552        doc: /* For internal use only.
9553 usage: (define-coding-system-internal ...)  */)
9554   (int nargs, Lisp_Object *args)
9555 {
9556   Lisp_Object name;
9557   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9558   Lisp_Object attrs;            /* Vector of attributes.  */
9559   Lisp_Object eol_type;
9560   Lisp_Object aliases;
9561   Lisp_Object coding_type, charset_list, safe_charsets;
9562   enum coding_category category;
9563   Lisp_Object tail, val;
9564   int max_charset_id = 0;
9565   int i;
9566
9567   if (nargs < coding_arg_max)
9568     goto short_args;
9569
9570   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9571
9572   name = args[coding_arg_name];
9573   CHECK_SYMBOL (name);
9574   CODING_ATTR_BASE_NAME (attrs) = name;
9575
9576   val = args[coding_arg_mnemonic];
9577   if (! STRINGP (val))
9578     CHECK_CHARACTER (val);
9579   CODING_ATTR_MNEMONIC (attrs) = val;
9580
9581   coding_type = args[coding_arg_coding_type];
9582   CHECK_SYMBOL (coding_type);
9583   CODING_ATTR_TYPE (attrs) = coding_type;
9584
9585   charset_list = args[coding_arg_charset_list];
9586   if (SYMBOLP (charset_list))
9587     {
9588       if (EQ (charset_list, Qiso_2022))
9589         {
9590           if (! EQ (coding_type, Qiso_2022))
9591             error ("Invalid charset-list");
9592           charset_list = Viso_2022_charset_list;
9593         }
9594       else if (EQ (charset_list, Qemacs_mule))
9595         {
9596           if (! EQ (coding_type, Qemacs_mule))
9597             error ("Invalid charset-list");
9598           charset_list = Vemacs_mule_charset_list;
9599         }
9600       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9601         if (max_charset_id < XFASTINT (XCAR (tail)))
9602           max_charset_id = XFASTINT (XCAR (tail));
9603     }
9604   else
9605     {
9606       charset_list = Fcopy_sequence (charset_list);
9607       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9608         {
9609           struct charset *charset;
9610
9611           val = XCAR (tail);
9612           CHECK_CHARSET_GET_CHARSET (val, charset);
9613           if (EQ (coding_type, Qiso_2022)
9614               ? CHARSET_ISO_FINAL (charset) < 0
9615               : EQ (coding_type, Qemacs_mule)
9616               ? CHARSET_EMACS_MULE_ID (charset) < 0
9617               : 0)
9618             error ("Can't handle charset `%s'",
9619                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9620
9621           XSETCAR (tail, make_number (charset->id));
9622           if (max_charset_id < charset->id)
9623             max_charset_id = charset->id;
9624         }
9625     }
9626   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9627
9628   safe_charsets = make_uninit_string (max_charset_id + 1);
9629   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9630   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9631     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9632   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9633
9634   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9635
9636   val = args[coding_arg_decode_translation_table];
9637   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9638     CHECK_SYMBOL (val);
9639   CODING_ATTR_DECODE_TBL (attrs) = val;
9640
9641   val = args[coding_arg_encode_translation_table];
9642   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9643     CHECK_SYMBOL (val);
9644   CODING_ATTR_ENCODE_TBL (attrs) = val;
9645
9646   val = args[coding_arg_post_read_conversion];
9647   CHECK_SYMBOL (val);
9648   CODING_ATTR_POST_READ (attrs) = val;
9649
9650   val = args[coding_arg_pre_write_conversion];
9651   CHECK_SYMBOL (val);
9652   CODING_ATTR_PRE_WRITE (attrs) = val;
9653
9654   val = args[coding_arg_default_char];
9655   if (NILP (val))
9656     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9657   else
9658     {
9659       CHECK_CHARACTER (val);
9660       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9661     }
9662
9663   val = args[coding_arg_for_unibyte];
9664   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9665
9666   val = args[coding_arg_plist];
9667   CHECK_LIST (val);
9668   CODING_ATTR_PLIST (attrs) = val;
9669
9670   if (EQ (coding_type, Qcharset))
9671     {
9672       /* Generate a lisp vector of 256 elements.  Each element is nil,
9673          integer, or a list of charset IDs.
9674
9675          If Nth element is nil, the byte code N is invalid in this
9676          coding system.
9677
9678          If Nth element is a number NUM, N is the first byte of a
9679          charset whose ID is NUM.
9680
9681          If Nth element is a list of charset IDs, N is the first byte
9682          of one of them.  The list is sorted by dimensions of the
9683          charsets.  A charset of smaller dimension comes firtst. */
9684       val = Fmake_vector (make_number (256), Qnil);
9685
9686       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9687         {
9688           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9689           int dim = CHARSET_DIMENSION (charset);
9690           int idx = (dim - 1) * 4;
9691
9692           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9693             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9694
9695           for (i = charset->code_space[idx];
9696                i <= charset->code_space[idx + 1]; i++)
9697             {
9698               Lisp_Object tmp, tmp2;
9699               int dim2;
9700
9701               tmp = AREF (val, i);
9702               if (NILP (tmp))
9703                 tmp = XCAR (tail);
9704               else if (NUMBERP (tmp))
9705                 {
9706                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9707                   if (dim < dim2)
9708                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9709                   else
9710                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9711                 }
9712               else
9713                 {
9714                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9715                     {
9716                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9717                       if (dim < dim2)
9718                         break;
9719                     }
9720                   if (NILP (tmp2))
9721                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9722                   else
9723                     {
9724                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9725                       XSETCAR (tmp2, XCAR (tail));
9726                     }
9727                 }
9728               ASET (val, i, tmp);
9729             }
9730         }
9731       ASET (attrs, coding_attr_charset_valids, val);
9732       category = coding_category_charset;
9733     }
9734   else if (EQ (coding_type, Qccl))
9735     {
9736       Lisp_Object valids;
9737
9738       if (nargs < coding_arg_ccl_max)
9739         goto short_args;
9740
9741       val = args[coding_arg_ccl_decoder];
9742       CHECK_CCL_PROGRAM (val);
9743       if (VECTORP (val))
9744         val = Fcopy_sequence (val);
9745       ASET (attrs, coding_attr_ccl_decoder, val);
9746
9747       val = args[coding_arg_ccl_encoder];
9748       CHECK_CCL_PROGRAM (val);
9749       if (VECTORP (val))
9750         val = Fcopy_sequence (val);
9751       ASET (attrs, coding_attr_ccl_encoder, val);
9752
9753       val = args[coding_arg_ccl_valids];
9754       valids = Fmake_string (make_number (256), make_number (0));
9755       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9756         {
9757           int from, to;
9758
9759           val = Fcar (tail);
9760           if (INTEGERP (val))
9761             {
9762               from = to = XINT (val);
9763               if (from < 0 || from > 255)
9764                 args_out_of_range_3 (val, make_number (0), make_number (255));
9765             }
9766           else
9767             {
9768               CHECK_CONS (val);
9769               CHECK_NATNUM_CAR (val);
9770               CHECK_NATNUM_CDR (val);
9771               from = XINT (XCAR (val));
9772               if (from > 255)
9773                 args_out_of_range_3 (XCAR (val),
9774                                      make_number (0), make_number (255));
9775               to = XINT (XCDR (val));
9776               if (to < from || to > 255)
9777                 args_out_of_range_3 (XCDR (val),
9778                                      XCAR (val), make_number (255));
9779             }
9780           for (i = from; i <= to; i++)
9781             SSET (valids, i, 1);
9782         }
9783       ASET (attrs, coding_attr_ccl_valids, valids);
9784
9785       category = coding_category_ccl;
9786     }
9787   else if (EQ (coding_type, Qutf_16))
9788     {
9789       Lisp_Object bom, endian;
9790
9791       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9792
9793       if (nargs < coding_arg_utf16_max)
9794         goto short_args;
9795
9796       bom = args[coding_arg_utf16_bom];
9797       if (! NILP (bom) && ! EQ (bom, Qt))
9798         {
9799           CHECK_CONS (bom);
9800           val = XCAR (bom);
9801           CHECK_CODING_SYSTEM (val);
9802           val = XCDR (bom);
9803           CHECK_CODING_SYSTEM (val);
9804         }
9805       ASET (attrs, coding_attr_utf_bom, bom);
9806
9807       endian = args[coding_arg_utf16_endian];
9808       CHECK_SYMBOL (endian);
9809       if (NILP (endian))
9810         endian = Qbig;
9811       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9812         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9813       ASET (attrs, coding_attr_utf_16_endian, endian);
9814
9815       category = (CONSP (bom)
9816                   ? coding_category_utf_16_auto
9817                   : NILP (bom)
9818                   ? (EQ (endian, Qbig)
9819                      ? coding_category_utf_16_be_nosig
9820                      : coding_category_utf_16_le_nosig)
9821                   : (EQ (endian, Qbig)
9822                      ? coding_category_utf_16_be
9823                      : coding_category_utf_16_le));
9824     }
9825   else if (EQ (coding_type, Qiso_2022))
9826     {
9827       Lisp_Object initial, reg_usage, request, flags;
9828       int i;
9829
9830       if (nargs < coding_arg_iso2022_max)
9831         goto short_args;
9832
9833       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9834       CHECK_VECTOR (initial);
9835       for (i = 0; i < 4; i++)
9836         {
9837           val = Faref (initial, make_number (i));
9838           if (! NILP (val))
9839             {
9840               struct charset *charset;
9841
9842               CHECK_CHARSET_GET_CHARSET (val, charset);
9843               ASET (initial, i, make_number (CHARSET_ID (charset)));
9844               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9845                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9846             }
9847           else
9848             ASET (initial, i, make_number (-1));
9849         }
9850
9851       reg_usage = args[coding_arg_iso2022_reg_usage];
9852       CHECK_CONS (reg_usage);
9853       CHECK_NUMBER_CAR (reg_usage);
9854       CHECK_NUMBER_CDR (reg_usage);
9855
9856       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9857       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9858         {
9859           int id;
9860           Lisp_Object tmp;
9861
9862           val = Fcar (tail);
9863           CHECK_CONS (val);
9864           tmp = XCAR (val);
9865           CHECK_CHARSET_GET_ID (tmp, id);
9866           CHECK_NATNUM_CDR (val);
9867           if (XINT (XCDR (val)) >= 4)
9868             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9869           XSETCAR (val, make_number (id));
9870         }
9871
9872       flags = args[coding_arg_iso2022_flags];
9873       CHECK_NATNUM (flags);
9874       i = XINT (flags);
9875       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9876         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9877
9878       ASET (attrs, coding_attr_iso_initial, initial);
9879       ASET (attrs, coding_attr_iso_usage, reg_usage);
9880       ASET (attrs, coding_attr_iso_request, request);
9881       ASET (attrs, coding_attr_iso_flags, flags);
9882       setup_iso_safe_charsets (attrs);
9883
9884       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9885         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9886                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9887                     ? coding_category_iso_7_else
9888                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9889                     ? coding_category_iso_7
9890                     : coding_category_iso_7_tight);
9891       else
9892         {
9893           int id = XINT (AREF (initial, 1));
9894
9895           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9896                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9897                        || id < 0)
9898                       ? coding_category_iso_8_else
9899                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9900                       ? coding_category_iso_8_1
9901                       : coding_category_iso_8_2);
9902         }
9903       if (category != coding_category_iso_8_1
9904           && category != coding_category_iso_8_2)
9905         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9906     }
9907   else if (EQ (coding_type, Qemacs_mule))
9908     {
9909       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9910         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9911       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9912       category = coding_category_emacs_mule;
9913     }
9914   else if (EQ (coding_type, Qshift_jis))
9915     {
9916
9917       struct charset *charset;
9918
9919       if (XINT (Flength (charset_list)) != 3
9920           && XINT (Flength (charset_list)) != 4)
9921         error ("There should be three or four charsets");
9922
9923       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9924       if (CHARSET_DIMENSION (charset) != 1)
9925         error ("Dimension of charset %s is not one",
9926                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9927       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9928         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9929
9930       charset_list = XCDR (charset_list);
9931       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9932       if (CHARSET_DIMENSION (charset) != 1)
9933         error ("Dimension of charset %s is not one",
9934                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9935
9936       charset_list = XCDR (charset_list);
9937       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9938       if (CHARSET_DIMENSION (charset) != 2)
9939         error ("Dimension of charset %s is not two",
9940                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9941
9942       charset_list = XCDR (charset_list);
9943       if (! NILP (charset_list))
9944         {
9945           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9946           if (CHARSET_DIMENSION (charset) != 2)
9947             error ("Dimension of charset %s is not two",
9948                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9949         }
9950
9951       category = coding_category_sjis;
9952       Vsjis_coding_system = name;
9953     }
9954   else if (EQ (coding_type, Qbig5))
9955     {
9956       struct charset *charset;
9957
9958       if (XINT (Flength (charset_list)) != 2)
9959         error ("There should be just two charsets");
9960
9961       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9962       if (CHARSET_DIMENSION (charset) != 1)
9963         error ("Dimension of charset %s is not one",
9964                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9965       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9966         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9967
9968       charset_list = XCDR (charset_list);
9969       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9970       if (CHARSET_DIMENSION (charset) != 2)
9971         error ("Dimension of charset %s is not two",
9972                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9973
9974       category = coding_category_big5;
9975       Vbig5_coding_system = name;
9976     }
9977   else if (EQ (coding_type, Qraw_text))
9978     {
9979       category = coding_category_raw_text;
9980       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9981     }
9982   else if (EQ (coding_type, Qutf_8))
9983     {
9984       Lisp_Object bom;
9985
9986       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9987
9988       if (nargs < coding_arg_utf8_max)
9989         goto short_args;
9990
9991       bom = args[coding_arg_utf8_bom];
9992       if (! NILP (bom) && ! EQ (bom, Qt))
9993         {
9994           CHECK_CONS (bom);
9995           val = XCAR (bom);
9996           CHECK_CODING_SYSTEM (val);
9997           val = XCDR (bom);
9998           CHECK_CODING_SYSTEM (val);
9999         }
10000       ASET (attrs, coding_attr_utf_bom, bom);
10001
10002       category = (CONSP (bom) ? coding_category_utf_8_auto
10003                   : NILP (bom) ? coding_category_utf_8_nosig
10004                   : coding_category_utf_8_sig);
10005     }
10006   else if (EQ (coding_type, Qundecided))
10007     category = coding_category_undecided;
10008   else
10009     error ("Invalid coding system type: %s",
10010            SDATA (SYMBOL_NAME (coding_type)));
10011
10012   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10013   CODING_ATTR_PLIST (attrs)
10014     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10015                                 CODING_ATTR_PLIST (attrs)));
10016   CODING_ATTR_PLIST (attrs)
10017     = Fcons (QCascii_compatible_p,
10018              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10019                     CODING_ATTR_PLIST (attrs)));
10020
10021   eol_type = args[coding_arg_eol_type];
10022   if (! NILP (eol_type)
10023       && ! EQ (eol_type, Qunix)
10024       && ! EQ (eol_type, Qdos)
10025       && ! EQ (eol_type, Qmac))
10026     error ("Invalid eol-type");
10027
10028   aliases = Fcons (name, Qnil);
10029
10030   if (NILP (eol_type))
10031     {
10032       eol_type = make_subsidiaries (name);
10033       for (i = 0; i < 3; i++)
10034         {
10035           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10036
10037           this_name = AREF (eol_type, i);
10038           this_aliases = Fcons (this_name, Qnil);
10039           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10040           this_spec = Fmake_vector (make_number (3), attrs);
10041           ASET (this_spec, 1, this_aliases);
10042           ASET (this_spec, 2, this_eol_type);
10043           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10044           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10045           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10046           if (NILP (val))
10047             Vcoding_system_alist
10048               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10049                        Vcoding_system_alist);
10050         }
10051     }
10052
10053   spec_vec = Fmake_vector (make_number (3), attrs);
10054   ASET (spec_vec, 1, aliases);
10055   ASET (spec_vec, 2, eol_type);
10056
10057   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10058   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10059   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10060   if (NILP (val))
10061     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10062                                   Vcoding_system_alist);
10063
10064   {
10065     int id = coding_categories[category].id;
10066
10067     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10068       setup_coding_system (name, &coding_categories[category]);
10069   }
10070
10071   return Qnil;
10072
10073  short_args:
10074   return Fsignal (Qwrong_number_of_arguments,
10075                   Fcons (intern ("define-coding-system-internal"),
10076                          make_number (nargs)));
10077 }
10078
10079
10080 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10081        3, 3, 0,
10082        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10083   (Lisp_Object coding_system, Lisp_Object prop, Lisp_Object val)
10084 {
10085   Lisp_Object spec, attrs;
10086
10087   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10088   attrs = AREF (spec, 0);
10089   if (EQ (prop, QCmnemonic))
10090     {
10091       if (! STRINGP (val))
10092         CHECK_CHARACTER (val);
10093       CODING_ATTR_MNEMONIC (attrs) = val;
10094     }
10095   else if (EQ (prop, QCdefault_char))
10096     {
10097       if (NILP (val))
10098         val = make_number (' ');
10099       else
10100         CHECK_CHARACTER (val);
10101       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10102     }
10103   else if (EQ (prop, QCdecode_translation_table))
10104     {
10105       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10106         CHECK_SYMBOL (val);
10107       CODING_ATTR_DECODE_TBL (attrs) = val;
10108     }
10109   else if (EQ (prop, QCencode_translation_table))
10110     {
10111       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10112         CHECK_SYMBOL (val);
10113       CODING_ATTR_ENCODE_TBL (attrs) = val;
10114     }
10115   else if (EQ (prop, QCpost_read_conversion))
10116     {
10117       CHECK_SYMBOL (val);
10118       CODING_ATTR_POST_READ (attrs) = val;
10119     }
10120   else if (EQ (prop, QCpre_write_conversion))
10121     {
10122       CHECK_SYMBOL (val);
10123       CODING_ATTR_PRE_WRITE (attrs) = val;
10124     }
10125   else if (EQ (prop, QCascii_compatible_p))
10126     {
10127       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10128     }
10129
10130   CODING_ATTR_PLIST (attrs)
10131     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10132   return val;
10133 }
10134
10135
10136 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10137        Sdefine_coding_system_alias, 2, 2, 0,
10138        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10139   (Lisp_Object alias, Lisp_Object coding_system)
10140 {
10141   Lisp_Object spec, aliases, eol_type, val;
10142
10143   CHECK_SYMBOL (alias);
10144   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10145   aliases = AREF (spec, 1);
10146   /* ALIASES should be a list of length more than zero, and the first
10147      element is a base coding system.  Append ALIAS at the tail of the
10148      list.  */
10149   while (!NILP (XCDR (aliases)))
10150     aliases = XCDR (aliases);
10151   XSETCDR (aliases, Fcons (alias, Qnil));
10152
10153   eol_type = AREF (spec, 2);
10154   if (VECTORP (eol_type))
10155     {
10156       Lisp_Object subsidiaries;
10157       int i;
10158
10159       subsidiaries = make_subsidiaries (alias);
10160       for (i = 0; i < 3; i++)
10161         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10162                                      AREF (eol_type, i));
10163     }
10164
10165   Fputhash (alias, spec, Vcoding_system_hash_table);
10166   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10167   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10168   if (NILP (val))
10169     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10170                                   Vcoding_system_alist);
10171
10172   return Qnil;
10173 }
10174
10175 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10176        1, 1, 0,
10177        doc: /* Return the base of CODING-SYSTEM.
10178 Any alias or subsidiary coding system is not a base coding system.  */)
10179   (Lisp_Object coding_system)
10180 {
10181   Lisp_Object spec, attrs;
10182
10183   if (NILP (coding_system))
10184     return (Qno_conversion);
10185   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10186   attrs = AREF (spec, 0);
10187   return CODING_ATTR_BASE_NAME (attrs);
10188 }
10189
10190 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10191        1, 1, 0,
10192        doc: "Return the property list of CODING-SYSTEM.")
10193   (Lisp_Object coding_system)
10194 {
10195   Lisp_Object spec, attrs;
10196
10197   if (NILP (coding_system))
10198     coding_system = Qno_conversion;
10199   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10200   attrs = AREF (spec, 0);
10201   return CODING_ATTR_PLIST (attrs);
10202 }
10203
10204
10205 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10206        1, 1, 0,
10207        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10208   (Lisp_Object coding_system)
10209 {
10210   Lisp_Object spec;
10211
10212   if (NILP (coding_system))
10213     coding_system = Qno_conversion;
10214   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10215   return AREF (spec, 1);
10216 }
10217
10218 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10219        Scoding_system_eol_type, 1, 1, 0,
10220        doc: /* Return eol-type of CODING-SYSTEM.
10221 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10222
10223 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10224 and CR respectively.
10225
10226 A vector value indicates that a format of end-of-line should be
10227 detected automatically.  Nth element of the vector is the subsidiary
10228 coding system whose eol-type is N.  */)
10229   (Lisp_Object coding_system)
10230 {
10231   Lisp_Object spec, eol_type;
10232   int n;
10233
10234   if (NILP (coding_system))
10235     coding_system = Qno_conversion;
10236   if (! CODING_SYSTEM_P (coding_system))
10237     return Qnil;
10238   spec = CODING_SYSTEM_SPEC (coding_system);
10239   eol_type = AREF (spec, 2);
10240   if (VECTORP (eol_type))
10241     return Fcopy_sequence (eol_type);
10242   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10243   return make_number (n);
10244 }
10245
10246 #endif /* emacs */
10247
10248 \f
10249 /*** 9. Post-amble ***/
10250
10251 void
10252 init_coding_once (void)
10253 {
10254   int i;
10255
10256   for (i = 0; i < coding_category_max; i++)
10257     {
10258       coding_categories[i].id = -1;
10259       coding_priorities[i] = i;
10260     }
10261
10262   /* ISO2022 specific initialize routine.  */
10263   for (i = 0; i < 0x20; i++)
10264     iso_code_class[i] = ISO_control_0;
10265   for (i = 0x21; i < 0x7F; i++)
10266     iso_code_class[i] = ISO_graphic_plane_0;
10267   for (i = 0x80; i < 0xA0; i++)
10268     iso_code_class[i] = ISO_control_1;
10269   for (i = 0xA1; i < 0xFF; i++)
10270     iso_code_class[i] = ISO_graphic_plane_1;
10271   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10272   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10273   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10274   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10275   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10276   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10277   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10278   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10279   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10280
10281   for (i = 0; i < 256; i++)
10282     {
10283       emacs_mule_bytes[i] = 1;
10284     }
10285   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10286   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10287   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10288   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10289 }
10290
10291 #ifdef emacs
10292
10293 void
10294 syms_of_coding (void)
10295 {
10296   staticpro (&Vcoding_system_hash_table);
10297   {
10298     Lisp_Object args[2];
10299     args[0] = QCtest;
10300     args[1] = Qeq;
10301     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10302   }
10303
10304   staticpro (&Vsjis_coding_system);
10305   Vsjis_coding_system = Qnil;
10306
10307   staticpro (&Vbig5_coding_system);
10308   Vbig5_coding_system = Qnil;
10309
10310   staticpro (&Vcode_conversion_reused_workbuf);
10311   Vcode_conversion_reused_workbuf = Qnil;
10312
10313   staticpro (&Vcode_conversion_workbuf_name);
10314   Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
10315
10316   reused_workbuf_in_use = 0;
10317
10318   DEFSYM (Qcharset, "charset");
10319   DEFSYM (Qtarget_idx, "target-idx");
10320   DEFSYM (Qcoding_system_history, "coding-system-history");
10321   Fset (Qcoding_system_history, Qnil);
10322
10323   /* Target FILENAME is the first argument.  */
10324   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10325   /* Target FILENAME is the third argument.  */
10326   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10327
10328   DEFSYM (Qcall_process, "call-process");
10329   /* Target PROGRAM is the first argument.  */
10330   Fput (Qcall_process, Qtarget_idx, make_number (0));
10331
10332   DEFSYM (Qcall_process_region, "call-process-region");
10333   /* Target PROGRAM is the third argument.  */
10334   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10335
10336   DEFSYM (Qstart_process, "start-process");
10337   /* Target PROGRAM is the third argument.  */
10338   Fput (Qstart_process, Qtarget_idx, make_number (2));
10339
10340   DEFSYM (Qopen_network_stream, "open-network-stream");
10341   /* Target SERVICE is the fourth argument.  */
10342   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10343
10344   DEFSYM (Qcoding_system, "coding-system");
10345   DEFSYM (Qcoding_aliases, "coding-aliases");
10346
10347   DEFSYM (Qeol_type, "eol-type");
10348   DEFSYM (Qunix, "unix");
10349   DEFSYM (Qdos, "dos");
10350
10351   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10352   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10353   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10354   DEFSYM (Qdefault_char, "default-char");
10355   DEFSYM (Qundecided, "undecided");
10356   DEFSYM (Qno_conversion, "no-conversion");
10357   DEFSYM (Qraw_text, "raw-text");
10358
10359   DEFSYM (Qiso_2022, "iso-2022");
10360
10361   DEFSYM (Qutf_8, "utf-8");
10362   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10363
10364   DEFSYM (Qutf_16, "utf-16");
10365   DEFSYM (Qbig, "big");
10366   DEFSYM (Qlittle, "little");
10367
10368   DEFSYM (Qshift_jis, "shift-jis");
10369   DEFSYM (Qbig5, "big5");
10370
10371   DEFSYM (Qcoding_system_p, "coding-system-p");
10372
10373   DEFSYM (Qcoding_system_error, "coding-system-error");
10374   Fput (Qcoding_system_error, Qerror_conditions,
10375         pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
10376   Fput (Qcoding_system_error, Qerror_message,
10377         make_pure_c_string ("Invalid coding system"));
10378
10379   /* Intern this now in case it isn't already done.
10380      Setting this variable twice is harmless.
10381      But don't staticpro it here--that is done in alloc.c.  */
10382   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10383
10384   DEFSYM (Qtranslation_table, "translation-table");
10385   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10386   DEFSYM (Qtranslation_table_id, "translation-table-id");
10387   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10388   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10389
10390   DEFSYM (Qvalid_codes, "valid-codes");
10391
10392   DEFSYM (Qemacs_mule, "emacs-mule");
10393
10394   DEFSYM (QCcategory, ":category");
10395   DEFSYM (QCmnemonic, ":mnemonic");
10396   DEFSYM (QCdefault_char, ":default-char");
10397   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10398   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10399   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10400   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10401   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10402
10403   Vcoding_category_table
10404     = Fmake_vector (make_number (coding_category_max), Qnil);
10405   staticpro (&Vcoding_category_table);
10406   /* Followings are target of code detection.  */
10407   ASET (Vcoding_category_table, coding_category_iso_7,
10408         intern_c_string ("coding-category-iso-7"));
10409   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10410         intern_c_string ("coding-category-iso-7-tight"));
10411   ASET (Vcoding_category_table, coding_category_iso_8_1,
10412         intern_c_string ("coding-category-iso-8-1"));
10413   ASET (Vcoding_category_table, coding_category_iso_8_2,
10414         intern_c_string ("coding-category-iso-8-2"));
10415   ASET (Vcoding_category_table, coding_category_iso_7_else,
10416         intern_c_string ("coding-category-iso-7-else"));
10417   ASET (Vcoding_category_table, coding_category_iso_8_else,
10418         intern_c_string ("coding-category-iso-8-else"));
10419   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10420         intern_c_string ("coding-category-utf-8-auto"));
10421   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10422         intern_c_string ("coding-category-utf-8"));
10423   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10424         intern_c_string ("coding-category-utf-8-sig"));
10425   ASET (Vcoding_category_table, coding_category_utf_16_be,
10426         intern_c_string ("coding-category-utf-16-be"));
10427   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10428         intern_c_string ("coding-category-utf-16-auto"));
10429   ASET (Vcoding_category_table, coding_category_utf_16_le,
10430         intern_c_string ("coding-category-utf-16-le"));
10431   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10432         intern_c_string ("coding-category-utf-16-be-nosig"));
10433   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10434         intern_c_string ("coding-category-utf-16-le-nosig"));
10435   ASET (Vcoding_category_table, coding_category_charset,
10436         intern_c_string ("coding-category-charset"));
10437   ASET (Vcoding_category_table, coding_category_sjis,
10438         intern_c_string ("coding-category-sjis"));
10439   ASET (Vcoding_category_table, coding_category_big5,
10440         intern_c_string ("coding-category-big5"));
10441   ASET (Vcoding_category_table, coding_category_ccl,
10442         intern_c_string ("coding-category-ccl"));
10443   ASET (Vcoding_category_table, coding_category_emacs_mule,
10444         intern_c_string ("coding-category-emacs-mule"));
10445   /* Followings are NOT target of code detection.  */
10446   ASET (Vcoding_category_table, coding_category_raw_text,
10447         intern_c_string ("coding-category-raw-text"));
10448   ASET (Vcoding_category_table, coding_category_undecided,
10449         intern_c_string ("coding-category-undecided"));
10450
10451   DEFSYM (Qinsufficient_source, "insufficient-source");
10452   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10453   DEFSYM (Qinvalid_source, "invalid-source");
10454   DEFSYM (Qinterrupted, "interrupted");
10455   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10456   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10457
10458   defsubr (&Scoding_system_p);
10459   defsubr (&Sread_coding_system);
10460   defsubr (&Sread_non_nil_coding_system);
10461   defsubr (&Scheck_coding_system);
10462   defsubr (&Sdetect_coding_region);
10463   defsubr (&Sdetect_coding_string);
10464   defsubr (&Sfind_coding_systems_region_internal);
10465   defsubr (&Sunencodable_char_position);
10466   defsubr (&Scheck_coding_systems_region);
10467   defsubr (&Sdecode_coding_region);
10468   defsubr (&Sencode_coding_region);
10469   defsubr (&Sdecode_coding_string);
10470   defsubr (&Sencode_coding_string);
10471   defsubr (&Sdecode_sjis_char);
10472   defsubr (&Sencode_sjis_char);
10473   defsubr (&Sdecode_big5_char);
10474   defsubr (&Sencode_big5_char);
10475   defsubr (&Sset_terminal_coding_system_internal);
10476   defsubr (&Sset_safe_terminal_coding_system_internal);
10477   defsubr (&Sterminal_coding_system);
10478   defsubr (&Sset_keyboard_coding_system_internal);
10479   defsubr (&Skeyboard_coding_system);
10480   defsubr (&Sfind_operation_coding_system);
10481   defsubr (&Sset_coding_system_priority);
10482   defsubr (&Sdefine_coding_system_internal);
10483   defsubr (&Sdefine_coding_system_alias);
10484   defsubr (&Scoding_system_put);
10485   defsubr (&Scoding_system_base);
10486   defsubr (&Scoding_system_plist);
10487   defsubr (&Scoding_system_aliases);
10488   defsubr (&Scoding_system_eol_type);
10489   defsubr (&Scoding_system_priority_list);
10490
10491   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10492                doc: /* List of coding systems.
10493
10494 Do not alter the value of this variable manually.  This variable should be
10495 updated by the functions `define-coding-system' and
10496 `define-coding-system-alias'.  */);
10497   Vcoding_system_list = Qnil;
10498
10499   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10500                doc: /* Alist of coding system names.
10501 Each element is one element list of coding system name.
10502 This variable is given to `completing-read' as COLLECTION argument.
10503
10504 Do not alter the value of this variable manually.  This variable should be
10505 updated by the functions `make-coding-system' and
10506 `define-coding-system-alias'.  */);
10507   Vcoding_system_alist = Qnil;
10508
10509   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10510                doc: /* List of coding-categories (symbols) ordered by priority.
10511
10512 On detecting a coding system, Emacs tries code detection algorithms
10513 associated with each coding-category one by one in this order.  When
10514 one algorithm agrees with a byte sequence of source text, the coding
10515 system bound to the corresponding coding-category is selected.
10516
10517 Don't modify this variable directly, but use `set-coding-priority'.  */);
10518   {
10519     int i;
10520
10521     Vcoding_category_list = Qnil;
10522     for (i = coding_category_max - 1; i >= 0; i--)
10523       Vcoding_category_list
10524         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10525                  Vcoding_category_list);
10526   }
10527
10528   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10529                doc: /* Specify the coding system for read operations.
10530 It is useful to bind this variable with `let', but do not set it globally.
10531 If the value is a coding system, it is used for decoding on read operation.
10532 If not, an appropriate element is used from one of the coding system alists.
10533 There are three such tables: `file-coding-system-alist',
10534 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10535   Vcoding_system_for_read = Qnil;
10536
10537   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10538                doc: /* Specify the coding system for write operations.
10539 Programs bind this variable with `let', but you should not set it globally.
10540 If the value is a coding system, it is used for encoding of output,
10541 when writing it to a file and when sending it to a file or subprocess.
10542
10543 If this does not specify a coding system, an appropriate element
10544 is used from one of the coding system alists.
10545 There are three such tables: `file-coding-system-alist',
10546 `process-coding-system-alist', and `network-coding-system-alist'.
10547 For output to files, if the above procedure does not specify a coding system,
10548 the value of `buffer-file-coding-system' is used.  */);
10549   Vcoding_system_for_write = Qnil;
10550
10551   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10552                doc: /*
10553 Coding system used in the latest file or process I/O.  */);
10554   Vlast_coding_system_used = Qnil;
10555
10556   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10557                doc: /*
10558 Error status of the last code conversion.
10559
10560 When an error was detected in the last code conversion, this variable
10561 is set to one of the following symbols.
10562   `insufficient-source'
10563   `inconsistent-eol'
10564   `invalid-source'
10565   `interrupted'
10566   `insufficient-memory'
10567 When no error was detected, the value doesn't change.  So, to check
10568 the error status of a code conversion by this variable, you must
10569 explicitly set this variable to nil before performing code
10570 conversion.  */);
10571   Vlast_code_conversion_error = Qnil;
10572
10573   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10574                doc: /*
10575 *Non-nil means always inhibit code conversion of end-of-line format.
10576 See info node `Coding Systems' and info node `Text and Binary' concerning
10577 such conversion.  */);
10578   inhibit_eol_conversion = 0;
10579
10580   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10581                doc: /*
10582 Non-nil means process buffer inherits coding system of process output.
10583 Bind it to t if the process output is to be treated as if it were a file
10584 read from some filesystem.  */);
10585   inherit_process_coding_system = 0;
10586
10587   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10588                doc: /*
10589 Alist to decide a coding system to use for a file I/O operation.
10590 The format is ((PATTERN . VAL) ...),
10591 where PATTERN is a regular expression matching a file name,
10592 VAL is a coding system, a cons of coding systems, or a function symbol.
10593 If VAL is a coding system, it is used for both decoding and encoding
10594 the file contents.
10595 If VAL is a cons of coding systems, the car part is used for decoding,
10596 and the cdr part is used for encoding.
10597 If VAL is a function symbol, the function must return a coding system
10598 or a cons of coding systems which are used as above.  The function is
10599 called with an argument that is a list of the arguments with which
10600 `find-operation-coding-system' was called.  If the function can't decide
10601 a coding system, it can return `undecided' so that the normal
10602 code-detection is performed.
10603
10604 See also the function `find-operation-coding-system'
10605 and the variable `auto-coding-alist'.  */);
10606   Vfile_coding_system_alist = Qnil;
10607
10608   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10609                doc: /*
10610 Alist to decide a coding system to use for a process I/O operation.
10611 The format is ((PATTERN . VAL) ...),
10612 where PATTERN is a regular expression matching a program name,
10613 VAL is a coding system, a cons of coding systems, or a function symbol.
10614 If VAL is a coding system, it is used for both decoding what received
10615 from the program and encoding what sent to the program.
10616 If VAL is a cons of coding systems, the car part is used for decoding,
10617 and the cdr part is used for encoding.
10618 If VAL is a function symbol, the function must return a coding system
10619 or a cons of coding systems which are used as above.
10620
10621 See also the function `find-operation-coding-system'.  */);
10622   Vprocess_coding_system_alist = Qnil;
10623
10624   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10625                doc: /*
10626 Alist to decide a coding system to use for a network I/O operation.
10627 The format is ((PATTERN . VAL) ...),
10628 where PATTERN is a regular expression matching a network service name
10629 or is a port number to connect to,
10630 VAL is a coding system, a cons of coding systems, or a function symbol.
10631 If VAL is a coding system, it is used for both decoding what received
10632 from the network stream and encoding what sent to the network stream.
10633 If VAL is a cons of coding systems, the car part is used for decoding,
10634 and the cdr part is used for encoding.
10635 If VAL is a function symbol, the function must return a coding system
10636 or a cons of coding systems which are used as above.
10637
10638 See also the function `find-operation-coding-system'.  */);
10639   Vnetwork_coding_system_alist = Qnil;
10640
10641   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10642                doc: /* Coding system to use with system messages.
10643 Also used for decoding keyboard input on X Window system.  */);
10644   Vlocale_coding_system = Qnil;
10645
10646   /* The eol mnemonics are reset in startup.el system-dependently.  */
10647   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10648                doc: /*
10649 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10650   eol_mnemonic_unix = make_pure_c_string (":");
10651
10652   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10653                doc: /*
10654 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10655   eol_mnemonic_dos = make_pure_c_string ("\\");
10656
10657   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10658                doc: /*
10659 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10660   eol_mnemonic_mac = make_pure_c_string ("/");
10661
10662   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10663                doc: /*
10664 *String displayed in mode line when end-of-line format is not yet determined.  */);
10665   eol_mnemonic_undecided = make_pure_c_string (":");
10666
10667   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10668                doc: /*
10669 *Non-nil enables character translation while encoding and decoding.  */);
10670   Venable_character_translation = Qt;
10671
10672   DEFVAR_LISP ("standard-translation-table-for-decode",
10673                &Vstandard_translation_table_for_decode,
10674                doc: /* Table for translating characters while decoding.  */);
10675   Vstandard_translation_table_for_decode = Qnil;
10676
10677   DEFVAR_LISP ("standard-translation-table-for-encode",
10678                &Vstandard_translation_table_for_encode,
10679                doc: /* Table for translating characters while encoding.  */);
10680   Vstandard_translation_table_for_encode = Qnil;
10681
10682   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10683                doc: /* Alist of charsets vs revision numbers.
10684 While encoding, if a charset (car part of an element) is found,
10685 designate it with the escape sequence identifying revision (cdr part
10686 of the element).  */);
10687   Vcharset_revision_table = Qnil;
10688
10689   DEFVAR_LISP ("default-process-coding-system",
10690                &Vdefault_process_coding_system,
10691                doc: /* Cons of coding systems used for process I/O by default.
10692 The car part is used for decoding a process output,
10693 the cdr part is used for encoding a text to be sent to a process.  */);
10694   Vdefault_process_coding_system = Qnil;
10695
10696   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10697                doc: /*
10698 Table of extra Latin codes in the range 128..159 (inclusive).
10699 This is a vector of length 256.
10700 If Nth element is non-nil, the existence of code N in a file
10701 \(or output of subprocess) doesn't prevent it to be detected as
10702 a coding system of ISO 2022 variant which has a flag
10703 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10704 or reading output of a subprocess.
10705 Only 128th through 159th elements have a meaning.  */);
10706   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10707
10708   DEFVAR_LISP ("select-safe-coding-system-function",
10709                &Vselect_safe_coding_system_function,
10710                doc: /*
10711 Function to call to select safe coding system for encoding a text.
10712
10713 If set, this function is called to force a user to select a proper
10714 coding system which can encode the text in the case that a default
10715 coding system used in each operation can't encode the text.  The
10716 function should take care that the buffer is not modified while
10717 the coding system is being selected.
10718
10719 The default value is `select-safe-coding-system' (which see).  */);
10720   Vselect_safe_coding_system_function = Qnil;
10721
10722   DEFVAR_BOOL ("coding-system-require-warning",
10723                &coding_system_require_warning,
10724                doc: /* Internal use only.
10725 If non-nil, on writing a file, `select-safe-coding-system-function' is
10726 called even if `coding-system-for-write' is non-nil.  The command
10727 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10728   coding_system_require_warning = 0;
10729
10730
10731   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10732                &inhibit_iso_escape_detection,
10733                doc: /*
10734 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10735
10736 When Emacs reads text, it tries to detect how the text is encoded.
10737 This code detection is sensitive to escape sequences.  If Emacs sees
10738 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10739 of the ISO2022 encodings, and decodes text by the corresponding coding
10740 system (e.g. `iso-2022-7bit').
10741
10742 However, there may be a case that you want to read escape sequences in
10743 a file as is.  In such a case, you can set this variable to non-nil.
10744 Then the code detection will ignore any escape sequences, and no text is
10745 detected as encoded in some ISO-2022 encoding.  The result is that all
10746 escape sequences become visible in a buffer.
10747
10748 The default value is nil, and it is strongly recommended not to change
10749 it.  That is because many Emacs Lisp source files that contain
10750 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10751 in Emacs's distribution, and they won't be decoded correctly on
10752 reading if you suppress escape sequence detection.
10753
10754 The other way to read escape sequences in a file without decoding is
10755 to explicitly specify some coding system that doesn't use ISO-2022
10756 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10757   inhibit_iso_escape_detection = 0;
10758
10759   DEFVAR_BOOL ("inhibit-null-byte-detection",
10760                &inhibit_null_byte_detection,
10761                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10762 By default, Emacs treats it as binary data, and does not attempt to
10763 decode it.  The effect is as if you specified `no-conversion' for
10764 reading that text.
10765
10766 Set this to non-nil when a regular text happens to include null bytes.
10767 Examples are Index nodes of Info files and null-byte delimited output
10768 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10769 decode text as usual.  */);
10770   inhibit_null_byte_detection = 0;
10771
10772   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10773                doc: /* Char table for translating self-inserting characters.
10774 This is applied to the result of input methods, not their input.
10775 See also `keyboard-translate-table'.
10776
10777 Use of this variable for character code unification was rendered
10778 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10779 internal character representation.  */);
10780     Vtranslation_table_for_input = Qnil;
10781
10782   {
10783     Lisp_Object args[coding_arg_max];
10784     Lisp_Object plist[16];
10785     int i;
10786
10787     for (i = 0; i < coding_arg_max; i++)
10788       args[i] = Qnil;
10789
10790     plist[0] = intern_c_string (":name");
10791     plist[1] = args[coding_arg_name] = Qno_conversion;
10792     plist[2] = intern_c_string (":mnemonic");
10793     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10794     plist[4] = intern_c_string (":coding-type");
10795     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10796     plist[6] = intern_c_string (":ascii-compatible-p");
10797     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10798     plist[8] = intern_c_string (":default-char");
10799     plist[9] = args[coding_arg_default_char] = make_number (0);
10800     plist[10] = intern_c_string (":for-unibyte");
10801     plist[11] = args[coding_arg_for_unibyte] = Qt;
10802     plist[12] = intern_c_string (":docstring");
10803     plist[13] = make_pure_c_string ("Do no conversion.\n\
10804 \n\
10805 When you visit a file with this coding, the file is read into a\n\
10806 unibyte buffer as is, thus each byte of a file is treated as a\n\
10807 character.");
10808     plist[14] = intern_c_string (":eol-type");
10809     plist[15] = args[coding_arg_eol_type] = Qunix;
10810     args[coding_arg_plist] = Flist (16, plist);
10811     Fdefine_coding_system_internal (coding_arg_max, args);
10812
10813     plist[1] = args[coding_arg_name] = Qundecided;
10814     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10815     plist[5] = args[coding_arg_coding_type] = Qundecided;
10816     /* This is already set.
10817        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10818     plist[8] = intern_c_string (":charset-list");
10819     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10820     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10821     plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10822     plist[15] = args[coding_arg_eol_type] = Qnil;
10823     args[coding_arg_plist] = Flist (16, plist);
10824     Fdefine_coding_system_internal (coding_arg_max, args);
10825   }
10826
10827   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10828
10829   {
10830     int i;
10831
10832     for (i = 0; i < coding_category_max; i++)
10833       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10834   }
10835 #if defined (DOS_NT)
10836   system_eol_type = Qdos;
10837 #else
10838   system_eol_type = Qunix;
10839 #endif
10840   staticpro (&system_eol_type);
10841 }
10842
10843 char *
10844 emacs_strerror (int error_number)
10845 {
10846   char *str;
10847
10848   synchronize_system_messages_locale ();
10849   str = strerror (error_number);
10850
10851   if (! NILP (Vlocale_coding_system))
10852     {
10853       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10854                                                       Vlocale_coding_system,
10855                                                       0);
10856       str = (char *) SDATA (dec);
10857     }
10858
10859   return str;
10860 }
10861
10862 #endif /* emacs */
10863
10864 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10865    (do not change this comment) */