src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008, 2009, 2010
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (struct coding_system *coding,
 158                    struct coding_detection_info *detect_info)
 159 {
 160   const unsigned char *src = coding->source;
 161   const unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (struct coding_system *coding)
 206 {
 207   const unsigned char *src = coding->source + coding->consumed;
 208   const unsigned char *src_end = coding->source + coding->src_bytes;
 209   /* SRC_BASE remembers the start position in source in each loop.
 210      The loop will be exited when there's not enough source code, or
 211      when there's no room in CHARBUF for a decoded character.  */
 212   const unsigned char *src_base;
 213   /* A buffer to produce decoded characters.  */
 214   int *charbuf = coding->charbuf + coding->charbuf_used;
 215   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 216   int multibytep = coding->src_multibyte;
 217
 218   while (1)
 219     {
 220       src_base = src;
 221       if (charbuf < charbuf_end)
 222         /* No more room to produce a decoded character.  */
 223         break;
 224       ONE_MORE_BYTE (c);
 225       /* Decode it. */
 226     }
 227
 228  no_more_source:
 229   if (src_base < src_end
 230       && coding->mode & CODING_MODE_LAST_BLOCK)
 231     /* If the source ends by partial bytes to construct a character,
 232        treat them as eight-bit raw data.  */
 233     while (src_base < src_end && charbuf < charbuf_end)
 234       *charbuf++ = *src_base++;
 235   /* Remember how many bytes and characters we consumed.  If the
 236      source is multibyte, the bytes and chars are not identical.  */
 237   coding->consumed = coding->consumed_char = src_base - coding->source;
 238   /* Remember how many characters we produced.  */
 239   coding->charbuf_used = charbuf - coding->charbuf;
 240 }
 241 #endif
 242
 243 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 244
 245   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 246   internal multibyte format by CODING.  The resulting byte sequence
 247   goes to a place pointed to by DESTINATION, the length of which
 248   should not exceed DST_BYTES.
 249
 250   These functions set the information of original and encoded texts in
 251   the members produced, produced_char, consumed, and consumed_char of
 252   the structure *CODING.  They also set the member result to one of
 253   CODING_RESULT_XXX indicating how the encoding finished.
 254
 255   DST_BYTES zero means that source area and destination area are
 256   overlapped, which means that we can produce a encoded text until it
 257   reaches at the head of not-yet-encoded source text.
 258
 259   Below is a template of these functions.  */
 260 #if 0
 261 static void
 262 encode_coding_XXX (struct coding_system *coding)
 263 {
 264   int multibytep = coding->dst_multibyte;
 265   int *charbuf = coding->charbuf;
 266   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 267   unsigned char *dst = coding->destination + coding->produced;
 268   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 269   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 270   int produced_chars = 0;
 271
 272   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 273     {
 274       int c = *charbuf;
 275       /* Encode C into DST, and increment DST.  */
 276     }
 277  label_no_more_destination:
 278   /* How many chars and bytes we produced.  */
 279   coding->produced_char += produced_chars;
 280   coding->produced = dst - coding->destination;
 281 }
 282 #endif
 283
 284 \f
 285 /*** 1. Preamble ***/
 286
 287 #include <config.h>
 288 #include <stdio.h>
 289 #include <setjmp.h>
 290
 291 #include "lisp.h"
 292 #include "buffer.h"
 293 #include "character.h"
 294 #include "charset.h"
 295 #include "ccl.h"
 296 #include "composite.h"
 297 #include "coding.h"
 298 #include "window.h"
 299 #include "frame.h"
 300 #include "termhooks.h"
 301
 302 Lisp_Object Vcoding_system_hash_table;
 303
 304 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 305 Lisp_Object Qunix, Qdos;
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 315 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 316 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 317 Lisp_Object QCascii_compatible_p;
 318
 319 Lisp_Object Qcall_process, Qcall_process_region;
 320 Lisp_Object Qstart_process, Qopen_network_stream;
 321 Lisp_Object Qtarget_idx;
 322
 323 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 324 Lisp_Object Qinterrupted, Qinsufficient_memory;
 325
 326 /* If a symbol has this property, evaluate the value to define the
 327    symbol as a coding system.  */
 328 static Lisp_Object Qcoding_system_define_form;
 329
 330 int coding_system_require_warning;
 331
 332 Lisp_Object Vselect_safe_coding_system_function;
 333
 334 /* Mnemonic string for each format of end-of-line.  */
 335 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 336 /* Mnemonic string to indicate format of end-of-line is not yet
 337    decided.  */
 338 Lisp_Object eol_mnemonic_undecided;
 339
 340 /* Format of end-of-line decided by system.  This is Qunix on
 341    Unix and Mac, Qdos on DOS/Windows.
 342    This has an effect only for external encoding (i.e. for output to
 343    file and process), not for in-buffer or Lisp string encoding.  */
 344 static Lisp_Object system_eol_type;
 345
 346 #ifdef emacs
 347
 348 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 349
 350 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 351
 352 /* Coding system emacs-mule and raw-text are for converting only
 353    end-of-line format.  */
 354 Lisp_Object Qemacs_mule, Qraw_text;
 355 Lisp_Object Qutf_8_emacs;
 356
 357 /* Coding-systems are handed between Emacs Lisp programs and C internal
 358    routines by the following three variables.  */
 359 /* Coding-system for reading files and receiving data from process.  */
 360 Lisp_Object Vcoding_system_for_read;
 361 /* Coding-system for writing files and sending data to process.  */
 362 Lisp_Object Vcoding_system_for_write;
 363 /* Coding-system actually used in the latest I/O.  */
 364 Lisp_Object Vlast_coding_system_used;
 365 /* Set to non-nil when an error is detected while code conversion.  */
 366 Lisp_Object Vlast_code_conversion_error;
 367 /* A vector of length 256 which contains information about special
 368    Latin codes (especially for dealing with Microsoft codes).  */
 369 Lisp_Object Vlatin_extra_code_table;
 370
 371 /* Flag to inhibit code conversion of end-of-line format.  */
 372 int inhibit_eol_conversion;
 373
 374 /* Flag to inhibit ISO2022 escape sequence detection.  */
 375 int inhibit_iso_escape_detection;
 376
 377 /* Flag to inhibit detection of binary files through null bytes.  */
 378 int inhibit_null_byte_detection;
 379
 380 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 381 int inherit_process_coding_system;
 382
 383 /* Coding system to be used to encode text for terminal display when
 384    terminal coding system is nil.  */
 385 struct coding_system safe_terminal_coding;
 386
 387 Lisp_Object Vfile_coding_system_alist;
 388 Lisp_Object Vprocess_coding_system_alist;
 389 Lisp_Object Vnetwork_coding_system_alist;
 390
 391 Lisp_Object Vlocale_coding_system;
 392
 393 #endif /* emacs */
 394
 395 /* Flag to tell if we look up translation table on character code
 396    conversion.  */
 397 Lisp_Object Venable_character_translation;
 398 /* Standard translation table to look up on decoding (reading).  */
 399 Lisp_Object Vstandard_translation_table_for_decode;
 400 /* Standard translation table to look up on encoding (writing).  */
 401 Lisp_Object Vstandard_translation_table_for_encode;
 402
 403 Lisp_Object Qtranslation_table;
 404 Lisp_Object Qtranslation_table_id;
 405 Lisp_Object Qtranslation_table_for_decode;
 406 Lisp_Object Qtranslation_table_for_encode;
 407
 408 /* Alist of charsets vs revision number.  */
 409 static Lisp_Object Vcharset_revision_table;
 410
 411 /* Default coding systems used for process I/O.  */
 412 Lisp_Object Vdefault_process_coding_system;
 413
 414 /* Char table for translating Quail and self-inserting input.  */
 415 Lisp_Object Vtranslation_table_for_input;
 416
 417 /* Two special coding systems.  */
 418 Lisp_Object Vsjis_coding_system;
 419 Lisp_Object Vbig5_coding_system;
 420
 421 /* ISO2022 section */
 422
 423 #define CODING_ISO_INITIAL(coding, reg)                 \
 424   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 425                      coding_attr_iso_initial),          \
 426                reg)))
 427
 428
 429 #define CODING_ISO_REQUEST(coding, charset_id)          \
 430   (((charset_id) <= (coding)->max_charset_id            \
 431     ? ((coding)->safe_charsets[charset_id] != 255       \
 432        ? (coding)->safe_charsets[charset_id]            \
 433        : -1)                                            \
 434     : -1))
 435
 436
 437 #define CODING_ISO_FLAGS(coding)        \
 438   ((coding)->spec.iso_2022.flags)
 439 #define CODING_ISO_DESIGNATION(coding, reg)     \
 440   ((coding)->spec.iso_2022.current_designation[reg])
 441 #define CODING_ISO_INVOCATION(coding, plane)    \
 442   ((coding)->spec.iso_2022.current_invocation[plane])
 443 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 444   ((coding)->spec.iso_2022.single_shifting)
 445 #define CODING_ISO_BOL(coding)  \
 446   ((coding)->spec.iso_2022.bol)
 447 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 448   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 449 #define CODING_ISO_CMP_STATUS(coding)   \
 450   (&(coding)->spec.iso_2022.cmp_status)
 451 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 452   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 453 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 454   ((coding)->spec.iso_2022.embedded_utf_8)
 455
 456 /* Control characters of ISO2022.  */
 457                         /* code */      /* function */
 458 #define ISO_CODE_LF     0x0A            /* line-feed */
 459 #define ISO_CODE_CR     0x0D            /* carriage-return */
 460 #define ISO_CODE_SO     0x0E            /* shift-out */
 461 #define ISO_CODE_SI     0x0F            /* shift-in */
 462 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 463 #define ISO_CODE_ESC    0x1B            /* escape */
 464 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 465 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 466 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 467
 468 /* All code (1-byte) of ISO2022 is classified into one of the
 469    followings.  */
 470 enum iso_code_class_type
 471   {
 472     ISO_control_0,              /* Control codes in the range
 473                                    0x00..0x1F and 0x7F, except for the
 474                                    following 5 codes.  */
 475     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 476     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 477     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 478     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 479     ISO_control_1,              /* Control codes in the range
 480                                    0x80..0x9F, except for the
 481                                    following 3 codes.  */
 482     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 483     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 484     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 485     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 486     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 487     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 488     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 489   };
 490
 491 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 492     `iso-flags' attribute of an iso2022 coding system.  */
 493
 494 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 495    instead of the correct short-form sequence (e.g. ESC $ A).  */
 496 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 497
 498 /* If set, reset graphic planes and registers at end-of-line to the
 499    initial state.  */
 500 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 501
 502 /* If set, reset graphic planes and registers before any control
 503    characters to the initial state.  */
 504 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 505
 506 /* If set, encode by 7-bit environment.  */
 507 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 508
 509 /* If set, use locking-shift function.  */
 510 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 511
 512 /* If set, use single-shift function.  Overwrite
 513    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 514 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 515
 516 /* If set, use designation escape sequence.  */
 517 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 518
 519 /* If set, produce revision number sequence.  */
 520 #define CODING_ISO_FLAG_REVISION        0x0080
 521
 522 /* If set, produce ISO6429's direction specifying sequence.  */
 523 #define CODING_ISO_FLAG_DIRECTION       0x0100
 524
 525 /* If set, assume designation states are reset at beginning of line on
 526    output.  */
 527 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 528
 529 /* If set, designation sequence should be placed at beginning of line
 530    on output.  */
 531 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 532
 533 /* If set, do not encode unsafe charactes on output.  */
 534 #define CODING_ISO_FLAG_SAFE            0x0800
 535
 536 /* If set, extra latin codes (128..159) are accepted as a valid code
 537    on input.  */
 538 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 539
 540 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 541
 542 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 543
 544 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 545
 546 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 547
 548 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 549
 550 /* A character to be produced on output if encoding of the original
 551    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 552 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 553
 554 /* UTF-8 section */
 555 #define CODING_UTF_8_BOM(coding)        \
 556   ((coding)->spec.utf_8_bom)
 557
 558 /* UTF-16 section */
 559 #define CODING_UTF_16_BOM(coding)       \
 560   ((coding)->spec.utf_16.bom)
 561
 562 #define CODING_UTF_16_ENDIAN(coding)    \
 563   ((coding)->spec.utf_16.endian)
 564
 565 #define CODING_UTF_16_SURROGATE(coding) \
 566   ((coding)->spec.utf_16.surrogate)
 567
 568
 569 /* CCL section */
 570 #define CODING_CCL_DECODER(coding)      \
 571   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 572 #define CODING_CCL_ENCODER(coding)      \
 573   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 574 #define CODING_CCL_VALIDS(coding)                                          \
 575   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 576
 577 /* Index for each coding category in `coding_categories' */
 578
 579 enum coding_category
 580   {
 581     coding_category_iso_7,
 582     coding_category_iso_7_tight,
 583     coding_category_iso_8_1,
 584     coding_category_iso_8_2,
 585     coding_category_iso_7_else,
 586     coding_category_iso_8_else,
 587     coding_category_utf_8_auto,
 588     coding_category_utf_8_nosig,
 589     coding_category_utf_8_sig,
 590     coding_category_utf_16_auto,
 591     coding_category_utf_16_be,
 592     coding_category_utf_16_le,
 593     coding_category_utf_16_be_nosig,
 594     coding_category_utf_16_le_nosig,
 595     coding_category_charset,
 596     coding_category_sjis,
 597     coding_category_big5,
 598     coding_category_ccl,
 599     coding_category_emacs_mule,
 600     /* All above are targets of code detection.  */
 601     coding_category_raw_text,
 602     coding_category_undecided,
 603     coding_category_max
 604   };
 605
 606 /* Definitions of flag bits used in detect_coding_XXXX.  */
 607 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 608 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 609 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 610 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 611 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 612 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 613 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 614 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 615 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 616 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 617 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 618 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 619 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 620 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 621 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 622 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 623 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 624 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 625 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 626 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 627
 628 /* This value is returned if detect_coding_mask () find nothing other
 629    than ASCII characters.  */
 630 #define CATEGORY_MASK_ANY               \
 631   (CATEGORY_MASK_ISO_7                  \
 632    | CATEGORY_MASK_ISO_7_TIGHT          \
 633    | CATEGORY_MASK_ISO_8_1              \
 634    | CATEGORY_MASK_ISO_8_2              \
 635    | CATEGORY_MASK_ISO_7_ELSE           \
 636    | CATEGORY_MASK_ISO_8_ELSE           \
 637    | CATEGORY_MASK_UTF_8_AUTO           \
 638    | CATEGORY_MASK_UTF_8_NOSIG          \
 639    | CATEGORY_MASK_UTF_8_SIG            \
 640    | CATEGORY_MASK_UTF_16_AUTO          \
 641    | CATEGORY_MASK_UTF_16_BE            \
 642    | CATEGORY_MASK_UTF_16_LE            \
 643    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 644    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 645    | CATEGORY_MASK_CHARSET              \
 646    | CATEGORY_MASK_SJIS                 \
 647    | CATEGORY_MASK_BIG5                 \
 648    | CATEGORY_MASK_CCL                  \
 649    | CATEGORY_MASK_EMACS_MULE)
 650
 651
 652 #define CATEGORY_MASK_ISO_7BIT \
 653   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 654
 655 #define CATEGORY_MASK_ISO_8BIT \
 656   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 657
 658 #define CATEGORY_MASK_ISO_ELSE \
 659   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 660
 661 #define CATEGORY_MASK_ISO_ESCAPE        \
 662   (CATEGORY_MASK_ISO_7                  \
 663    | CATEGORY_MASK_ISO_7_TIGHT          \
 664    | CATEGORY_MASK_ISO_7_ELSE           \
 665    | CATEGORY_MASK_ISO_8_ELSE)
 666
 667 #define CATEGORY_MASK_ISO       \
 668   (  CATEGORY_MASK_ISO_7BIT     \
 669      | CATEGORY_MASK_ISO_8BIT   \
 670      | CATEGORY_MASK_ISO_ELSE)
 671
 672 #define CATEGORY_MASK_UTF_16            \
 673   (CATEGORY_MASK_UTF_16_AUTO            \
 674    | CATEGORY_MASK_UTF_16_BE            \
 675    | CATEGORY_MASK_UTF_16_LE            \
 676    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 677    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 678
 679 #define CATEGORY_MASK_UTF_8     \
 680   (CATEGORY_MASK_UTF_8_AUTO     \
 681    | CATEGORY_MASK_UTF_8_NOSIG  \
 682    | CATEGORY_MASK_UTF_8_SIG)
 683
 684 /* List of symbols `coding-category-xxx' ordered by priority.  This
 685    variable is exposed to Emacs Lisp.  */
 686 static Lisp_Object Vcoding_category_list;
 687
 688 /* Table of coding categories (Lisp symbols).  This variable is for
 689    internal use oly.  */
 690 static Lisp_Object Vcoding_category_table;
 691
 692 /* Table of coding-categories ordered by priority.  */
 693 static enum coding_category coding_priorities[coding_category_max];
 694
 695 /* Nth element is a coding context for the coding system bound to the
 696    Nth coding category.  */
 697 static struct coding_system coding_categories[coding_category_max];
 698
 699 /*** Commonly used macros and functions ***/
 700
 701 #ifndef min
 702 #define min(a, b) ((a) < (b) ? (a) : (b))
 703 #endif
 704 #ifndef max
 705 #define max(a, b) ((a) > (b) ? (a) : (b))
 706 #endif
 707
 708 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 709   do {                                                  \
 710     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 711     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 712   } while (0)
 713
 714
 715 /* Safely get one byte from the source text pointed by SRC which ends
 716    at SRC_END, and set C to that byte.  If there are not enough bytes
 717    in the source, it jumps to `no_more_source'.  If multibytep is
 718    nonzero, and a multibyte character is found at SRC, set C to the
 719    negative value of the character code.  The caller should declare
 720    and set these variables appropriately in advance:
 721         src, src_end, multibytep */
 722
 723 #define ONE_MORE_BYTE(c)                                \
 724   do {                                                  \
 725     if (src == src_end)                                 \
 726       {                                                 \
 727         if (src_base < src)                             \
 728           record_conversion_result                      \
 729             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 730         goto no_more_source;                            \
 731       }                                                 \
 732     c = *src++;                                         \
 733     if (multibytep && (c & 0x80))                       \
 734       {                                                 \
 735         if ((c & 0xFE) == 0xC0)                         \
 736           c = ((c & 1) << 6) | *src++;                  \
 737         else                                            \
 738           {                                             \
 739             src--;                                      \
 740             c = - string_char (src, &src, NULL);        \
 741             record_conversion_result                    \
 742               (coding, CODING_RESULT_INVALID_SRC);      \
 743           }                                             \
 744       }                                                 \
 745     consumed_chars++;                                   \
 746   } while (0)
 747
 748 /* Safely get two bytes from the source text pointed by SRC which ends
 749    at SRC_END, and set C1 and C2 to those bytes while skipping the
 750    heading multibyte characters.  If there are not enough bytes in the
 751    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 752    a multibyte character is found for C2, set C2 to the negative value
 753    of the character code.  The caller should declare and set these
 754    variables appropriately in advance:
 755         src, src_end, multibytep
 756    It is intended that this macro is used in detect_coding_utf_16.  */
 757
 758 #define TWO_MORE_BYTES(c1, c2)                          \
 759   do {                                                  \
 760     do {                                                \
 761       if (src == src_end)                               \
 762         goto no_more_source;                            \
 763       c1 = *src++;                                      \
 764       if (multibytep && (c1 & 0x80))                    \
 765         {                                               \
 766           if ((c1 & 0xFE) == 0xC0)                      \
 767             c1 = ((c1 & 1) << 6) | *src++;              \
 768           else                                          \
 769             {                                           \
 770               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 771               c1 = -1;                                  \
 772             }                                           \
 773         }                                               \
 774     } while (c1 < 0);                                   \
 775     if (src == src_end)                                 \
 776       goto no_more_source;                              \
 777     c2 = *src++;                                        \
 778     if (multibytep && (c2 & 0x80))                      \
 779       {                                                 \
 780         if ((c2 & 0xFE) == 0xC0)                        \
 781           c2 = ((c2 & 1) << 6) | *src++;                \
 782         else                                            \
 783           c2 = -1;                                      \
 784       }                                                 \
 785   } while (0)
 786
 787
 788 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 789   do {                                                  \
 790     c = *src++;                                         \
 791     if (multibytep && (c & 0x80))                       \
 792       {                                                 \
 793         if ((c & 0xFE) == 0xC0)                         \
 794           c = ((c & 1) << 6) | *src++;                  \
 795         else                                            \
 796           {                                             \
 797             src--;                                      \
 798             c = - string_char (src, &src, NULL);        \
 799             record_conversion_result                    \
 800               (coding, CODING_RESULT_INVALID_SRC);      \
 801           }                                             \
 802       }                                                 \
 803     consumed_chars++;                                   \
 804   } while (0)
 805
 806
 807 /* Store a byte C in the place pointed by DST and increment DST to the
 808    next free point, and increment PRODUCED_CHARS.  The caller should
 809    assure that C is 0..127, and declare and set the variable `dst'
 810    appropriately in advance.
 811 */
 812
 813
 814 #define EMIT_ONE_ASCII_BYTE(c)  \
 815   do {                          \
 816     produced_chars++;           \
 817     *dst++ = (c);               \
 818   } while (0)
 819
 820
 821 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 822
 823 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 824   do {                                  \
 825     produced_chars += 2;                \
 826     *dst++ = (c1), *dst++ = (c2);       \
 827   } while (0)
 828
 829
 830 /* Store a byte C in the place pointed by DST and increment DST to the
 831    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 832    nonzero, store in an appropriate multibyte from.  The caller should
 833    declare and set the variables `dst' and `multibytep' appropriately
 834    in advance.  */
 835
 836 #define EMIT_ONE_BYTE(c)                \
 837   do {                                  \
 838     produced_chars++;                   \
 839     if (multibytep)                     \
 840       {                                 \
 841         int ch = (c);                   \
 842         if (ch >= 0x80)                 \
 843           ch = BYTE8_TO_CHAR (ch);      \
 844         CHAR_STRING_ADVANCE (ch, dst);  \
 845       }                                 \
 846     else                                \
 847       *dst++ = (c);                     \
 848   } while (0)
 849
 850
 851 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 852
 853 #define EMIT_TWO_BYTES(c1, c2)          \
 854   do {                                  \
 855     produced_chars += 2;                \
 856     if (multibytep)                     \
 857       {                                 \
 858         int ch;                         \
 859                                         \
 860         ch = (c1);                      \
 861         if (ch >= 0x80)                 \
 862           ch = BYTE8_TO_CHAR (ch);      \
 863         CHAR_STRING_ADVANCE (ch, dst);  \
 864         ch = (c2);                      \
 865         if (ch >= 0x80)                 \
 866           ch = BYTE8_TO_CHAR (ch);      \
 867         CHAR_STRING_ADVANCE (ch, dst);  \
 868       }                                 \
 869     else                                \
 870       {                                 \
 871         *dst++ = (c1);                  \
 872         *dst++ = (c2);                  \
 873       }                                 \
 874   } while (0)
 875
 876
 877 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 878   do {                                  \
 879     EMIT_ONE_BYTE (c1);                 \
 880     EMIT_TWO_BYTES (c2, c3);            \
 881   } while (0)
 882
 883
 884 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 885   do {                                          \
 886     EMIT_TWO_BYTES (c1, c2);                    \
 887     EMIT_TWO_BYTES (c3, c4);                    \
 888   } while (0)
 889
 890
 891 /* Prototypes for static functions.  */
 892 static void record_conversion_result (struct coding_system *coding,
 893                                       enum coding_result_code result);
 894 static int detect_coding_utf_8 (struct coding_system *,
 895                                 struct coding_detection_info *info);
 896 static void decode_coding_utf_8 (struct coding_system *);
 897 static int encode_coding_utf_8 (struct coding_system *);
 898
 899 static int detect_coding_utf_16 (struct coding_system *,
 900                                  struct coding_detection_info *info);
 901 static void decode_coding_utf_16 (struct coding_system *);
 902 static int encode_coding_utf_16 (struct coding_system *);
 903
 904 static int detect_coding_iso_2022 (struct coding_system *,
 905                                    struct coding_detection_info *info);
 906 static void decode_coding_iso_2022 (struct coding_system *);
 907 static int encode_coding_iso_2022 (struct coding_system *);
 908
 909 static int detect_coding_emacs_mule (struct coding_system *,
 910                                      struct coding_detection_info *info);
 911 static void decode_coding_emacs_mule (struct coding_system *);
 912 static int encode_coding_emacs_mule (struct coding_system *);
 913
 914 static int detect_coding_sjis (struct coding_system *,
 915                                struct coding_detection_info *info);
 916 static void decode_coding_sjis (struct coding_system *);
 917 static int encode_coding_sjis (struct coding_system *);
 918
 919 static int detect_coding_big5 (struct coding_system *,
 920                                struct coding_detection_info *info);
 921 static void decode_coding_big5 (struct coding_system *);
 922 static int encode_coding_big5 (struct coding_system *);
 923
 924 static int detect_coding_ccl (struct coding_system *,
 925                               struct coding_detection_info *info);
 926 static void decode_coding_ccl (struct coding_system *);
 927 static int encode_coding_ccl (struct coding_system *);
 928
 929 static void decode_coding_raw_text (struct coding_system *);
 930 static int encode_coding_raw_text (struct coding_system *);
 931
 932 static void coding_set_source (struct coding_system *);
 933 static void coding_set_destination (struct coding_system *);
 934 static void coding_alloc_by_realloc (struct coding_system *, EMACS_INT);
 935 static void coding_alloc_by_making_gap (struct coding_system *,
 936                                         EMACS_INT, EMACS_INT);
 937 static unsigned char *alloc_destination (struct coding_system *,
 938                                          EMACS_INT, unsigned char *);
 939 static void setup_iso_safe_charsets (Lisp_Object);
 940 static unsigned char *encode_designation_at_bol (struct coding_system *,
 941                                                  int *, int *,
 942                                                  unsigned char *);
 943 static int detect_eol (const unsigned char *,
 944                        EMACS_INT, enum coding_category);
 945 static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
 946 static void decode_eol (struct coding_system *);
 947 static Lisp_Object get_translation_table (Lisp_Object, int, int *);
 948 static Lisp_Object get_translation (Lisp_Object, int *, int *);
 949 static int produce_chars (struct coding_system *, Lisp_Object, int);
 950 static INLINE void produce_charset (struct coding_system *, int *,
 951                                     EMACS_INT);
 952 static void produce_annotation (struct coding_system *, EMACS_INT);
 953 static int decode_coding (struct coding_system *);
 954 static INLINE int *handle_composition_annotation (EMACS_INT, EMACS_INT,
 955                                                   struct coding_system *,
 956                                                   int *, EMACS_INT *);
 957 static INLINE int *handle_charset_annotation (EMACS_INT, EMACS_INT,
 958                                               struct coding_system *,
 959                                               int *, EMACS_INT *);
 960 static void consume_chars (struct coding_system *, Lisp_Object, int);
 961 static int encode_coding (struct coding_system *);
 962 static Lisp_Object make_conversion_work_buffer (int);
 963 static Lisp_Object code_conversion_restore (Lisp_Object);
 964 static INLINE int char_encodable_p (int, Lisp_Object);
 965 static Lisp_Object make_subsidiaries (Lisp_Object);
 966
 967 static void
 968 record_conversion_result (struct coding_system *coding,
 969                           enum coding_result_code result)
 970 {
 971   coding->result = result;
 972   switch (result)
 973     {
 974     case CODING_RESULT_INSUFFICIENT_SRC:
 975       Vlast_code_conversion_error = Qinsufficient_source;
 976       break;
 977     case CODING_RESULT_INCONSISTENT_EOL:
 978       Vlast_code_conversion_error = Qinconsistent_eol;
 979       break;
 980     case CODING_RESULT_INVALID_SRC:
 981       Vlast_code_conversion_error = Qinvalid_source;
 982       break;
 983     case CODING_RESULT_INTERRUPT:
 984       Vlast_code_conversion_error = Qinterrupted;
 985       break;
 986     case CODING_RESULT_INSUFFICIENT_MEM:
 987       Vlast_code_conversion_error = Qinsufficient_memory;
 988       break;
 989     case CODING_RESULT_INSUFFICIENT_DST:
 990       /* Don't record this error in Vlast_code_conversion_error
 991          because it happens just temporarily and is resolved when the
 992          whole conversion is finished.  */
 993       break;
 994     case CODING_RESULT_SUCCESS:
 995       break;
 996     default:
 997       Vlast_code_conversion_error = intern ("Unknown error");
 998     }
 999 }
1000
1001 /* This wrapper macro is used to preserve validity of pointers into
1002    buffer text across calls to decode_char, which could cause
1003    relocation of buffers if it loads a charset map, because loading a
1004    charset map allocates large structures.  */
1005 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1006   do {                                                                       \
1007     charset_map_loaded = 0;                                                  \
1008     c = DECODE_CHAR (charset, code);                                         \
1009     if (charset_map_loaded)                                                  \
1010       {                                                                      \
1011         const unsigned char *orig = coding->source;                          \
1012         EMACS_INT offset;                                                    \
1013                                                                              \
1014         coding_set_source (coding);                                          \
1015         offset = coding->source - orig;                                      \
1016         src += offset;                                                       \
1017         src_base += offset;                                                  \
1018         src_end += offset;                                                   \
1019       }                                                                      \
1020   } while (0)
1021
1022
1023 /* If there are at least BYTES length of room at dst, allocate memory
1024    for coding->destination and update dst and dst_end.  We don't have
1025    to take care of coding->source which will be relocated.  It is
1026    handled by calling coding_set_source in encode_coding.  */
1027
1028 #define ASSURE_DESTINATION(bytes)                               \
1029   do {                                                          \
1030     if (dst + (bytes) >= dst_end)                               \
1031       {                                                         \
1032         int more_bytes = charbuf_end - charbuf + (bytes);       \
1033                                                                 \
1034         dst = alloc_destination (coding, more_bytes, dst);      \
1035         dst_end = coding->destination + coding->dst_bytes;      \
1036       }                                                         \
1037   } while (0)
1038
1039
1040 /* Store multibyte form of the character C in P, and advance P to the
1041    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1042    never calls MAYBE_UNIFY_CHAR.  */
1043
1044 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1045   do {                                          \
1046     if ((c) <= MAX_1_BYTE_CHAR)                 \
1047       *(p)++ = (c);                             \
1048     else if ((c) <= MAX_2_BYTE_CHAR)            \
1049       *(p)++ = (0xC0 | ((c) >> 6)),             \
1050         *(p)++ = (0x80 | ((c) & 0x3F));         \
1051     else if ((c) <= MAX_3_BYTE_CHAR)            \
1052       *(p)++ = (0xE0 | ((c) >> 12)),            \
1053         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1054         *(p)++ = (0x80 | ((c) & 0x3F));         \
1055     else if ((c) <= MAX_4_BYTE_CHAR)            \
1056       *(p)++ = (0xF0 | (c >> 18)),              \
1057         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1058         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1059         *(p)++ = (0x80 | (c & 0x3F));           \
1060     else if ((c) <= MAX_5_BYTE_CHAR)            \
1061       *(p)++ = 0xF8,                            \
1062         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1063         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1064         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1065         *(p)++ = (0x80 | (c & 0x3F));           \
1066     else                                        \
1067       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1068   } while (0)
1069
1070
1071 /* Return the character code of character whose multibyte form is at
1072    P, and advance P to the end of the multibyte form.  This is like
1073    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1074
1075 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1076   (!((p)[0] & 0x80)                                             \
1077    ? *(p)++                                                     \
1078    : ! ((p)[0] & 0x20)                                          \
1079    ? ((p) += 2,                                                 \
1080       ((((p)[-2] & 0x1F) << 6)                                  \
1081        | ((p)[-1] & 0x3F)                                       \
1082        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1083    : ! ((p)[0] & 0x10)                                          \
1084    ? ((p) += 3,                                                 \
1085       ((((p)[-3] & 0x0F) << 12)                                 \
1086        | (((p)[-2] & 0x3F) << 6)                                \
1087        | ((p)[-1] & 0x3F)))                                     \
1088    : ! ((p)[0] & 0x08)                                          \
1089    ? ((p) += 4,                                                 \
1090       ((((p)[-4] & 0xF) << 18)                                  \
1091        | (((p)[-3] & 0x3F) << 12)                               \
1092        | (((p)[-2] & 0x3F) << 6)                                \
1093        | ((p)[-1] & 0x3F)))                                     \
1094    : ((p) += 5,                                                 \
1095       ((((p)[-4] & 0x3F) << 18)                                 \
1096        | (((p)[-3] & 0x3F) << 12)                               \
1097        | (((p)[-2] & 0x3F) << 6)                                \
1098        | ((p)[-1] & 0x3F))))
1099
1100
1101 static void
1102 coding_set_source (struct coding_system *coding)
1103 {
1104   if (BUFFERP (coding->src_object))
1105     {
1106       struct buffer *buf = XBUFFER (coding->src_object);
1107
1108       if (coding->src_pos < 0)
1109         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1110       else
1111         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1112     }
1113   else if (STRINGP (coding->src_object))
1114     {
1115       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1116     }
1117   else
1118     /* Otherwise, the source is C string and is never relocated
1119        automatically.  Thus we don't have to update anything.  */
1120     ;
1121 }
1122
1123 static void
1124 coding_set_destination (struct coding_system *coding)
1125 {
1126   if (BUFFERP (coding->dst_object))
1127     {
1128       if (coding->src_pos < 0)
1129         {
1130           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1131           coding->dst_bytes = (GAP_END_ADDR
1132                                - (coding->src_bytes - coding->consumed)
1133                                - coding->destination);
1134         }
1135       else
1136         {
1137           /* We are sure that coding->dst_pos_byte is before the gap
1138              of the buffer. */
1139           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1140                                  + coding->dst_pos_byte - BEG_BYTE);
1141           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1142                                - coding->destination);
1143         }
1144     }
1145   else
1146     /* Otherwise, the destination is C string and is never relocated
1147        automatically.  Thus we don't have to update anything.  */
1148     ;
1149 }
1150
1151
1152 static void
1153 coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes)
1154 {
1155   coding->destination = (unsigned char *) xrealloc (coding->destination,
1156                                                     coding->dst_bytes + bytes);
1157   coding->dst_bytes += bytes;
1158 }
1159
1160 static void
1161 coding_alloc_by_making_gap (struct coding_system *coding,
1162                             EMACS_INT gap_head_used, EMACS_INT bytes)
1163 {
1164   if (EQ (coding->src_object, coding->dst_object))
1165     {
1166       /* The gap may contain the produced data at the head and not-yet
1167          consumed data at the tail.  To preserve those data, we at
1168          first make the gap size to zero, then increase the gap
1169          size.  */
1170       EMACS_INT add = GAP_SIZE;
1171
1172       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1173       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1174       make_gap (bytes);
1175       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1176       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1177     }
1178   else
1179     {
1180       Lisp_Object this_buffer;
1181
1182       this_buffer = Fcurrent_buffer ();
1183       set_buffer_internal (XBUFFER (coding->dst_object));
1184       make_gap (bytes);
1185       set_buffer_internal (XBUFFER (this_buffer));
1186     }
1187 }
1188
1189
1190 static unsigned char *
1191 alloc_destination (struct coding_system *coding, EMACS_INT nbytes,
1192                    unsigned char *dst)
1193 {
1194   EMACS_INT offset = dst - coding->destination;
1195
1196   if (BUFFERP (coding->dst_object))
1197     {
1198       struct buffer *buf = XBUFFER (coding->dst_object);
1199
1200       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1201     }
1202   else
1203     coding_alloc_by_realloc (coding, nbytes);
1204   coding_set_destination (coding);
1205   dst = coding->destination + offset;
1206   return dst;
1207 }
1208
1209 /** Macros for annotations.  */
1210
1211 /* An annotation data is stored in the array coding->charbuf in this
1212    format:
1213      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1214    LENGTH is the number of elements in the annotation.
1215    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1216    NCHARS is the number of characters in the text annotated.
1217
1218    The format of the following elements depend on ANNOTATION_MASK.
1219
1220    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1221    follows:
1222      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1223
1224    NBYTES is the number of bytes specified in the header part of
1225    old-style emacs-mule encoding, or 0 for the other kind of
1226    composition.
1227
1228    METHOD is one of enum composition_method.
1229
1230    Optionnal COMPOSITION-COMPONENTS are characters and composition
1231    rules.
1232
1233    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1234    follows.
1235
1236    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1237    recover from an invalid annotation, and should be skipped by
1238    produce_annotation.  */
1239
1240 /* Maximum length of the header of annotation data.  */
1241 #define MAX_ANNOTATION_LENGTH 5
1242
1243 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1244   do {                                                  \
1245     *(buf)++ = -(len);                                  \
1246     *(buf)++ = (mask);                                  \
1247     *(buf)++ = (nchars);                                \
1248     coding->annotated = 1;                              \
1249   } while (0);
1250
1251 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1252   do {                                                                      \
1253     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1254     *buf++ = nbytes;                                                        \
1255     *buf++ = method;                                                        \
1256   } while (0)
1257
1258
1259 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1260   do {                                                                  \
1261     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1262     *buf++ = id;                                                        \
1263   } while (0)
1264
1265 \f
1266 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1267
1268
1269
1270 \f
1271 /*** 3. UTF-8 ***/
1272
1273 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1274    Check if a text is encoded in UTF-8.  If it is, return 1, else
1275    return 0.  */
1276
1277 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1278 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1279 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1280 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1281 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1282 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1283
1284 #define UTF_BOM 0xFEFF
1285 #define UTF_8_BOM_1 0xEF
1286 #define UTF_8_BOM_2 0xBB
1287 #define UTF_8_BOM_3 0xBF
1288
1289 static int
1290 detect_coding_utf_8 (struct coding_system *coding,
1291                      struct coding_detection_info *detect_info)
1292 {
1293   const unsigned char *src = coding->source, *src_base;
1294   const unsigned char *src_end = coding->source + coding->src_bytes;
1295   int multibytep = coding->src_multibyte;
1296   int consumed_chars = 0;
1297   int bom_found = 0;
1298   int found = 0;
1299
1300   detect_info->checked |= CATEGORY_MASK_UTF_8;
1301   /* A coding system of this category is always ASCII compatible.  */
1302   src += coding->head_ascii;
1303
1304   while (1)
1305     {
1306       int c, c1, c2, c3, c4;
1307
1308       src_base = src;
1309       ONE_MORE_BYTE (c);
1310       if (c < 0 || UTF_8_1_OCTET_P (c))
1311         continue;
1312       ONE_MORE_BYTE (c1);
1313       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1314         break;
1315       if (UTF_8_2_OCTET_LEADING_P (c))
1316         {
1317           found = 1;
1318           continue;
1319         }
1320       ONE_MORE_BYTE (c2);
1321       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1322         break;
1323       if (UTF_8_3_OCTET_LEADING_P (c))
1324         {
1325           found = 1;
1326           if (src_base == coding->source
1327               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1328             bom_found = 1;
1329           continue;
1330         }
1331       ONE_MORE_BYTE (c3);
1332       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1333         break;
1334       if (UTF_8_4_OCTET_LEADING_P (c))
1335         {
1336           found = 1;
1337           continue;
1338         }
1339       ONE_MORE_BYTE (c4);
1340       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1341         break;
1342       if (UTF_8_5_OCTET_LEADING_P (c))
1343         {
1344           found = 1;
1345           continue;
1346         }
1347       break;
1348     }
1349   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1350   return 0;
1351
1352  no_more_source:
1353   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1354     {
1355       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1356       return 0;
1357     }
1358   if (bom_found)
1359     {
1360       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1361       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1362     }
1363   else
1364     {
1365       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1366       if (found)
1367         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1368     }
1369   return 1;
1370 }
1371
1372
1373 static void
1374 decode_coding_utf_8 (struct coding_system *coding)
1375 {
1376   const unsigned char *src = coding->source + coding->consumed;
1377   const unsigned char *src_end = coding->source + coding->src_bytes;
1378   const unsigned char *src_base;
1379   int *charbuf = coding->charbuf + coding->charbuf_used;
1380   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1381   int consumed_chars = 0, consumed_chars_base = 0;
1382   int multibytep = coding->src_multibyte;
1383   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1384   Lisp_Object attr, charset_list;
1385   int eol_crlf =
1386     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1387   int byte_after_cr = -1;
1388
1389   CODING_GET_INFO (coding, attr, charset_list);
1390
1391   if (bom != utf_without_bom)
1392     {
1393       int c1, c2, c3;
1394
1395       src_base = src;
1396       ONE_MORE_BYTE (c1);
1397       if (! UTF_8_3_OCTET_LEADING_P (c1))
1398         src = src_base;
1399       else
1400         {
1401           ONE_MORE_BYTE (c2);
1402           if (! UTF_8_EXTRA_OCTET_P (c2))
1403             src = src_base;
1404           else
1405             {
1406               ONE_MORE_BYTE (c3);
1407               if (! UTF_8_EXTRA_OCTET_P (c3))
1408                 src = src_base;
1409               else
1410                 {
1411                   if ((c1 != UTF_8_BOM_1)
1412                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1413                     src = src_base;
1414                   else
1415                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1416                 }
1417             }
1418         }
1419     }
1420   CODING_UTF_8_BOM (coding) = utf_without_bom;
1421
1422   while (1)
1423     {
1424       int c, c1, c2, c3, c4, c5;
1425
1426       src_base = src;
1427       consumed_chars_base = consumed_chars;
1428
1429       if (charbuf >= charbuf_end)
1430         {
1431           if (byte_after_cr >= 0)
1432             src_base--;
1433           break;
1434         }
1435
1436       if (byte_after_cr >= 0)
1437         c1 = byte_after_cr, byte_after_cr = -1;
1438       else
1439         ONE_MORE_BYTE (c1);
1440       if (c1 < 0)
1441         {
1442           c = - c1;
1443         }
1444       else if (UTF_8_1_OCTET_P (c1))
1445         {
1446           if (eol_crlf && c1 == '\r')
1447             ONE_MORE_BYTE (byte_after_cr);
1448           c = c1;
1449         }
1450       else
1451         {
1452           ONE_MORE_BYTE (c2);
1453           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1454             goto invalid_code;
1455           if (UTF_8_2_OCTET_LEADING_P (c1))
1456             {
1457               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1458               /* Reject overlong sequences here and below.  Encoders
1459                  producing them are incorrect, they can be misleading,
1460                  and they mess up read/write invariance.  */
1461               if (c < 128)
1462                 goto invalid_code;
1463             }
1464           else
1465             {
1466               ONE_MORE_BYTE (c3);
1467               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1468                 goto invalid_code;
1469               if (UTF_8_3_OCTET_LEADING_P (c1))
1470                 {
1471                   c = (((c1 & 0xF) << 12)
1472                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1473                   if (c < 0x800
1474                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1475                     goto invalid_code;
1476                 }
1477               else
1478                 {
1479                   ONE_MORE_BYTE (c4);
1480                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1481                     goto invalid_code;
1482                   if (UTF_8_4_OCTET_LEADING_P (c1))
1483                     {
1484                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1485                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1486                     if (c < 0x10000)
1487                       goto invalid_code;
1488                     }
1489                   else
1490                     {
1491                       ONE_MORE_BYTE (c5);
1492                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1493                         goto invalid_code;
1494                       if (UTF_8_5_OCTET_LEADING_P (c1))
1495                         {
1496                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1497                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1498                                | (c5 & 0x3F));
1499                           if ((c > MAX_CHAR) || (c < 0x200000))
1500                             goto invalid_code;
1501                         }
1502                       else
1503                         goto invalid_code;
1504                     }
1505                 }
1506             }
1507         }
1508
1509       *charbuf++ = c;
1510       continue;
1511
1512     invalid_code:
1513       src = src_base;
1514       consumed_chars = consumed_chars_base;
1515       ONE_MORE_BYTE (c);
1516       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1517       coding->errors++;
1518     }
1519
1520  no_more_source:
1521   coding->consumed_char += consumed_chars_base;
1522   coding->consumed = src_base - coding->source;
1523   coding->charbuf_used = charbuf - coding->charbuf;
1524 }
1525
1526
1527 static int
1528 encode_coding_utf_8 (struct coding_system *coding)
1529 {
1530   int multibytep = coding->dst_multibyte;
1531   int *charbuf = coding->charbuf;
1532   int *charbuf_end = charbuf + coding->charbuf_used;
1533   unsigned char *dst = coding->destination + coding->produced;
1534   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1535   int produced_chars = 0;
1536   int c;
1537
1538   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1539     {
1540       ASSURE_DESTINATION (3);
1541       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1542       CODING_UTF_8_BOM (coding) = utf_without_bom;
1543     }
1544
1545   if (multibytep)
1546     {
1547       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1548
1549       while (charbuf < charbuf_end)
1550         {
1551           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1552
1553           ASSURE_DESTINATION (safe_room);
1554           c = *charbuf++;
1555           if (CHAR_BYTE8_P (c))
1556             {
1557               c = CHAR_TO_BYTE8 (c);
1558               EMIT_ONE_BYTE (c);
1559             }
1560           else
1561             {
1562               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1563               for (p = str; p < pend; p++)
1564                 EMIT_ONE_BYTE (*p);
1565             }
1566         }
1567     }
1568   else
1569     {
1570       int safe_room = MAX_MULTIBYTE_LENGTH;
1571
1572       while (charbuf < charbuf_end)
1573         {
1574           ASSURE_DESTINATION (safe_room);
1575           c = *charbuf++;
1576           if (CHAR_BYTE8_P (c))
1577             *dst++ = CHAR_TO_BYTE8 (c);
1578           else
1579             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1580           produced_chars++;
1581         }
1582     }
1583   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1584   coding->produced_char += produced_chars;
1585   coding->produced = dst - coding->destination;
1586   return 0;
1587 }
1588
1589
1590 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1591    Check if a text is encoded in one of UTF-16 based coding systems.
1592    If it is, return 1, else return 0.  */
1593
1594 #define UTF_16_HIGH_SURROGATE_P(val) \
1595   (((val) & 0xFC00) == 0xD800)
1596
1597 #define UTF_16_LOW_SURROGATE_P(val) \
1598   (((val) & 0xFC00) == 0xDC00)
1599
1600 #define UTF_16_INVALID_P(val)   \
1601   (((val) == 0xFFFE)            \
1602    || ((val) == 0xFFFF)         \
1603    || UTF_16_LOW_SURROGATE_P (val))
1604
1605
1606 static int
1607 detect_coding_utf_16 (struct coding_system *coding,
1608                       struct coding_detection_info *detect_info)
1609 {
1610   const unsigned char *src = coding->source, *src_base = src;
1611   const unsigned char *src_end = coding->source + coding->src_bytes;
1612   int multibytep = coding->src_multibyte;
1613   int consumed_chars = 0;
1614   int c1, c2;
1615
1616   detect_info->checked |= CATEGORY_MASK_UTF_16;
1617   if (coding->mode & CODING_MODE_LAST_BLOCK
1618       && (coding->src_chars & 1))
1619     {
1620       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1621       return 0;
1622     }
1623
1624   TWO_MORE_BYTES (c1, c2);
1625   if ((c1 == 0xFF) && (c2 == 0xFE))
1626     {
1627       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1628                              | CATEGORY_MASK_UTF_16_AUTO);
1629       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1630                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1631                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1632     }
1633   else if ((c1 == 0xFE) && (c2 == 0xFF))
1634     {
1635       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1636                              | CATEGORY_MASK_UTF_16_AUTO);
1637       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1638                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1639                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1640     }
1641   else if (c2 < 0)
1642     {
1643       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1644       return 0;
1645     }
1646   else
1647     {
1648       /* We check the dispersion of Eth and Oth bytes where E is even and
1649          O is odd.  If both are high, we assume binary data.*/
1650       unsigned char e[256], o[256];
1651       unsigned e_num = 1, o_num = 1;
1652
1653       memset (e, 0, 256);
1654       memset (o, 0, 256);
1655       e[c1] = 1;
1656       o[c2] = 1;
1657
1658       detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1659                                 |CATEGORY_MASK_UTF_16_BE
1660                                 | CATEGORY_MASK_UTF_16_LE);
1661
1662       while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1663              != CATEGORY_MASK_UTF_16)
1664         {
1665           TWO_MORE_BYTES (c1, c2);
1666           if (c2 < 0)
1667             break;
1668           if (! e[c1])
1669             {
1670               e[c1] = 1;
1671               e_num++;
1672               if (e_num >= 128)
1673                 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1674             }
1675           if (! o[c2])
1676             {
1677               o[c2] = 1;
1678               o_num++;
1679               if (o_num >= 128)
1680                 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1681             }
1682         }
1683       return 0;
1684     }
1685
1686  no_more_source:
1687   return 1;
1688 }
1689
1690 static void
1691 decode_coding_utf_16 (struct coding_system *coding)
1692 {
1693   const unsigned char *src = coding->source + coding->consumed;
1694   const unsigned char *src_end = coding->source + coding->src_bytes;
1695   const unsigned char *src_base;
1696   int *charbuf = coding->charbuf + coding->charbuf_used;
1697   /* We may produces at most 3 chars in one loop.  */
1698   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1699   int consumed_chars = 0, consumed_chars_base = 0;
1700   int multibytep = coding->src_multibyte;
1701   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1702   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1703   int surrogate = CODING_UTF_16_SURROGATE (coding);
1704   Lisp_Object attr, charset_list;
1705   int eol_crlf =
1706     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1707   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1708
1709   CODING_GET_INFO (coding, attr, charset_list);
1710
1711   if (bom == utf_with_bom)
1712     {
1713       int c, c1, c2;
1714
1715       src_base = src;
1716       ONE_MORE_BYTE (c1);
1717       ONE_MORE_BYTE (c2);
1718       c = (c1 << 8) | c2;
1719
1720       if (endian == utf_16_big_endian
1721           ? c != 0xFEFF : c != 0xFFFE)
1722         {
1723           /* The first two bytes are not BOM.  Treat them as bytes
1724              for a normal character.  */
1725           src = src_base;
1726           coding->errors++;
1727         }
1728       CODING_UTF_16_BOM (coding) = utf_without_bom;
1729     }
1730   else if (bom == utf_detect_bom)
1731     {
1732       /* We have already tried to detect BOM and failed in
1733          detect_coding.  */
1734       CODING_UTF_16_BOM (coding) = utf_without_bom;
1735     }
1736
1737   while (1)
1738     {
1739       int c, c1, c2;
1740
1741       src_base = src;
1742       consumed_chars_base = consumed_chars;
1743
1744       if (charbuf >= charbuf_end)
1745         {
1746           if (byte_after_cr1 >= 0)
1747             src_base -= 2;
1748           break;
1749         }
1750
1751       if (byte_after_cr1 >= 0)
1752         c1 = byte_after_cr1, byte_after_cr1 = -1;
1753       else
1754         ONE_MORE_BYTE (c1);
1755       if (c1 < 0)
1756         {
1757           *charbuf++ = -c1;
1758           continue;
1759         }
1760       if (byte_after_cr2 >= 0)
1761         c2 = byte_after_cr2, byte_after_cr2 = -1;
1762       else
1763         ONE_MORE_BYTE (c2);
1764       if (c2 < 0)
1765         {
1766           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1767           *charbuf++ = -c2;
1768           continue;
1769         }
1770       c = (endian == utf_16_big_endian
1771            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1772
1773       if (surrogate)
1774         {
1775           if (! UTF_16_LOW_SURROGATE_P (c))
1776             {
1777               if (endian == utf_16_big_endian)
1778                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1779               else
1780                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1781               *charbuf++ = c1;
1782               *charbuf++ = c2;
1783               coding->errors++;
1784               if (UTF_16_HIGH_SURROGATE_P (c))
1785                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1786               else
1787                 *charbuf++ = c;
1788             }
1789           else
1790             {
1791               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1792               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1793               *charbuf++ = 0x10000 + c;
1794             }
1795         }
1796       else
1797         {
1798           if (UTF_16_HIGH_SURROGATE_P (c))
1799             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1800           else
1801             {
1802               if (eol_crlf && c == '\r')
1803                 {
1804                   ONE_MORE_BYTE (byte_after_cr1);
1805                   ONE_MORE_BYTE (byte_after_cr2);
1806                 }
1807               *charbuf++ = c;
1808             }
1809         }
1810     }
1811
1812  no_more_source:
1813   coding->consumed_char += consumed_chars_base;
1814   coding->consumed = src_base - coding->source;
1815   coding->charbuf_used = charbuf - coding->charbuf;
1816 }
1817
1818 static int
1819 encode_coding_utf_16 (struct coding_system *coding)
1820 {
1821   int multibytep = coding->dst_multibyte;
1822   int *charbuf = coding->charbuf;
1823   int *charbuf_end = charbuf + coding->charbuf_used;
1824   unsigned char *dst = coding->destination + coding->produced;
1825   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1826   int safe_room = 8;
1827   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1828   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1829   int produced_chars = 0;
1830   Lisp_Object attrs, charset_list;
1831   int c;
1832
1833   CODING_GET_INFO (coding, attrs, charset_list);
1834
1835   if (bom != utf_without_bom)
1836     {
1837       ASSURE_DESTINATION (safe_room);
1838       if (big_endian)
1839         EMIT_TWO_BYTES (0xFE, 0xFF);
1840       else
1841         EMIT_TWO_BYTES (0xFF, 0xFE);
1842       CODING_UTF_16_BOM (coding) = utf_without_bom;
1843     }
1844
1845   while (charbuf < charbuf_end)
1846     {
1847       ASSURE_DESTINATION (safe_room);
1848       c = *charbuf++;
1849       if (c > MAX_UNICODE_CHAR)
1850         c = coding->default_char;
1851
1852       if (c < 0x10000)
1853         {
1854           if (big_endian)
1855             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1856           else
1857             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1858         }
1859       else
1860         {
1861           int c1, c2;
1862
1863           c -= 0x10000;
1864           c1 = (c >> 10) + 0xD800;
1865           c2 = (c & 0x3FF) + 0xDC00;
1866           if (big_endian)
1867             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1868           else
1869             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1870         }
1871     }
1872   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1873   coding->produced = dst - coding->destination;
1874   coding->produced_char += produced_chars;
1875   return 0;
1876 }
1877
1878 \f
1879 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1880
1881 /* Emacs' internal format for representation of multiple character
1882    sets is a kind of multi-byte encoding, i.e. characters are
1883    represented by variable-length sequences of one-byte codes.
1884
1885    ASCII characters and control characters (e.g. `tab', `newline') are
1886    represented by one-byte sequences which are their ASCII codes, in
1887    the range 0x00 through 0x7F.
1888
1889    8-bit characters of the range 0x80..0x9F are represented by
1890    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1891    code + 0x20).
1892
1893    8-bit characters of the range 0xA0..0xFF are represented by
1894    one-byte sequences which are their 8-bit code.
1895
1896    The other characters are represented by a sequence of `base
1897    leading-code', optional `extended leading-code', and one or two
1898    `position-code's.  The length of the sequence is determined by the
1899    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1900    whereas extended leading-code and position-code take the range 0xA0
1901    through 0xFF.  See `charset.h' for more details about leading-code
1902    and position-code.
1903
1904    --- CODE RANGE of Emacs' internal format ---
1905    character set        range
1906    -------------        -----
1907    ascii                0x00..0x7F
1908    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1909    eight-bit-graphic    0xA0..0xBF
1910    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1911    ---------------------------------------------
1912
1913    As this is the internal character representation, the format is
1914    usually not used externally (i.e. in a file or in a data sent to a
1915    process).  But, it is possible to have a text externally in this
1916    format (i.e. by encoding by the coding system `emacs-mule').
1917
1918    In that case, a sequence of one-byte codes has a slightly different
1919    form.
1920
1921    At first, all characters in eight-bit-control are represented by
1922    one-byte sequences which are their 8-bit code.
1923
1924    Next, character composition data are represented by the byte
1925    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1926    where,
1927         METHOD is 0xF2 plus one of composition method (enum
1928         composition_method),
1929
1930         BYTES is 0xA0 plus a byte length of this composition data,
1931
1932         CHARS is 0xA0 plus a number of characters composed by this
1933         data,
1934
1935         COMPONENTs are characters of multibye form or composition
1936         rules encoded by two-byte of ASCII codes.
1937
1938    In addition, for backward compatibility, the following formats are
1939    also recognized as composition data on decoding.
1940
1941    0x80 MSEQ ...
1942    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1943
1944    Here,
1945         MSEQ is a multibyte form but in these special format:
1946           ASCII: 0xA0 ASCII_CODE+0x80,
1947           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1948         RULE is a one byte code of the range 0xA0..0xF0 that
1949         represents a composition rule.
1950   */
1951
1952 char emacs_mule_bytes[256];
1953
1954
1955 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1956    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1957    else return 0.  */
1958
1959 static int
1960 detect_coding_emacs_mule (struct coding_system *coding,
1961                           struct coding_detection_info *detect_info)
1962 {
1963   const unsigned char *src = coding->source, *src_base;
1964   const unsigned char *src_end = coding->source + coding->src_bytes;
1965   int multibytep = coding->src_multibyte;
1966   int consumed_chars = 0;
1967   int c;
1968   int found = 0;
1969
1970   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1971   /* A coding system of this category is always ASCII compatible.  */
1972   src += coding->head_ascii;
1973
1974   while (1)
1975     {
1976       src_base = src;
1977       ONE_MORE_BYTE (c);
1978       if (c < 0)
1979         continue;
1980       if (c == 0x80)
1981         {
1982           /* Perhaps the start of composite character.  We simply skip
1983              it because analyzing it is too heavy for detecting.  But,
1984              at least, we check that the composite character
1985              constitutes of more than 4 bytes.  */
1986           const unsigned char *src_base;
1987
1988         repeat:
1989           src_base = src;
1990           do
1991             {
1992               ONE_MORE_BYTE (c);
1993             }
1994           while (c >= 0xA0);
1995
1996           if (src - src_base <= 4)
1997             break;
1998           found = CATEGORY_MASK_EMACS_MULE;
1999           if (c == 0x80)
2000             goto repeat;
2001         }
2002
2003       if (c < 0x80)
2004         {
2005           if (c < 0x20
2006               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2007             break;
2008         }
2009       else
2010         {
2011           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2012
2013           while (more_bytes > 0)
2014             {
2015               ONE_MORE_BYTE (c);
2016               if (c < 0xA0)
2017                 {
2018                   src--;        /* Unread the last byte.  */
2019                   break;
2020                 }
2021               more_bytes--;
2022             }
2023           if (more_bytes != 0)
2024             break;
2025           found = CATEGORY_MASK_EMACS_MULE;
2026         }
2027     }
2028   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2029   return 0;
2030
2031  no_more_source:
2032   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2033     {
2034       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2035       return 0;
2036     }
2037   detect_info->found |= found;
2038   return 1;
2039 }
2040
2041
2042 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2043    character.  If CMP_STATUS indicates that we must expect MSEQ or
2044    RULE described above, decode it and return the negative value of
2045    the decoded character or rule.  If an invalid byte is found, return
2046    -1.  If SRC is too short, return -2.  */
2047
2048 int
2049 emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2050                  int *nbytes, int *nchars, int *id,
2051                  struct composition_status *cmp_status)
2052 {
2053   const unsigned char *src_end = coding->source + coding->src_bytes;
2054   const unsigned char *src_base = src;
2055   int multibytep = coding->src_multibyte;
2056   struct charset *charset;
2057   unsigned code;
2058   int c;
2059   int consumed_chars = 0;
2060   int mseq_found = 0;
2061
2062   ONE_MORE_BYTE (c);
2063   if (c < 0)
2064     {
2065       c = -c;
2066       charset = emacs_mule_charset[0];
2067     }
2068   else
2069     {
2070       if (c >= 0xA0)
2071         {
2072           if (cmp_status->state != COMPOSING_NO
2073               && cmp_status->old_form)
2074             {
2075               if (cmp_status->state == COMPOSING_CHAR)
2076                 {
2077                   if (c == 0xA0)
2078                     {
2079                       ONE_MORE_BYTE (c);
2080                       c -= 0x80;
2081                       if (c < 0)
2082                         goto invalid_code;
2083                     }
2084                   else
2085                     c -= 0x20;
2086                   mseq_found = 1;
2087                 }
2088               else
2089                 {
2090                   *nbytes = src - src_base;
2091                   *nchars = consumed_chars;
2092                   return -c;
2093                 }
2094             }
2095           else
2096             goto invalid_code;
2097         }
2098
2099       switch (emacs_mule_bytes[c])
2100         {
2101         case 2:
2102           if (! (charset = emacs_mule_charset[c]))
2103             goto invalid_code;
2104           ONE_MORE_BYTE (c);
2105           if (c < 0xA0)
2106             goto invalid_code;
2107           code = c & 0x7F;
2108           break;
2109
2110         case 3:
2111           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2112               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2113             {
2114               ONE_MORE_BYTE (c);
2115               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
2116                 goto invalid_code;
2117               ONE_MORE_BYTE (c);
2118               if (c < 0xA0)
2119                 goto invalid_code;
2120               code = c & 0x7F;
2121             }
2122           else
2123             {
2124               if (! (charset = emacs_mule_charset[c]))
2125                 goto invalid_code;
2126               ONE_MORE_BYTE (c);
2127               if (c < 0xA0)
2128                 goto invalid_code;
2129               code = (c & 0x7F) << 8;
2130               ONE_MORE_BYTE (c);
2131               if (c < 0xA0)
2132                 goto invalid_code;
2133               code |= c & 0x7F;
2134             }
2135           break;
2136
2137         case 4:
2138           ONE_MORE_BYTE (c);
2139           if (c < 0 || ! (charset = emacs_mule_charset[c]))
2140             goto invalid_code;
2141           ONE_MORE_BYTE (c);
2142           if (c < 0xA0)
2143             goto invalid_code;
2144           code = (c & 0x7F) << 8;
2145           ONE_MORE_BYTE (c);
2146           if (c < 0xA0)
2147             goto invalid_code;
2148           code |= c & 0x7F;
2149           break;
2150
2151         case 1:
2152           code = c;
2153           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2154                                      ? charset_ascii : charset_eight_bit);
2155           break;
2156
2157         default:
2158           abort ();
2159         }
2160       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, code, c);
2161       if (c < 0)
2162         goto invalid_code;
2163     }
2164   *nbytes = src - src_base;
2165   *nchars = consumed_chars;
2166   if (id)
2167     *id = charset->id;
2168   return (mseq_found ? -c : c);
2169
2170  no_more_source:
2171   return -2;
2172
2173  invalid_code:
2174   return -1;
2175 }
2176
2177
2178 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2179
2180 /* Handle these composition sequence ('|': the end of header elements,
2181    BYTES and CHARS >= 0xA0):
2182
2183    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2184    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2185    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2186
2187    and these old form:
2188
2189    (4) relative composition: 0x80 | MSEQ ... MSEQ
2190    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2191
2192    When the starter 0x80 and the following header elements are found,
2193    this annotation header is produced.
2194
2195         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2196
2197    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2198    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2199
2200    Then, upon reading the following elements, these codes are produced
2201    until the composition end is found:
2202
2203    (1) CHAR ... CHAR
2204    (2) ALT ... ALT CHAR ... CHAR
2205    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2206    (4) CHAR ... CHAR
2207    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2208
2209    When the composition end is found, LENGTH and NCHARS in the
2210    annotation header is updated as below:
2211
2212    (1) LENGTH: unchanged, NCHARS: unchanged
2213    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2214    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2215    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2216    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2217
2218    If an error is found while composing, the annotation header is
2219    changed to the original composition header (plus filler -1s) as
2220    below:
2221
2222    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2223    (5)          [ 0x80 0xFF -1 -1- -1 ]
2224
2225    and the sequence [ -2 DECODED-RULE ] is changed to the original
2226    byte sequence as below:
2227         o the original byte sequence is B: [ B -1 ]
2228         o the original byte sequence is B1 B2: [ B1 B2 ]
2229
2230    Most of the routines are implemented by macros because many
2231    variables and labels in the caller decode_coding_emacs_mule must be
2232    accessible, and they are usually called just once (thus doesn't
2233    increase the size of compiled object).  */
2234
2235 /* Decode a composition rule represented by C as a component of
2236    composition sequence of Emacs 20 style.  Set RULE to the decoded
2237    rule. */
2238
2239 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2240   do {                                                  \
2241     int gref, nref;                                     \
2242                                                         \
2243     c -= 0xA0;                                          \
2244     if (c < 0 || c >= 81)                               \
2245       goto invalid_code;                                \
2246     gref = c / 9, nref = c % 9;                         \
2247     if (gref == 4) gref = 10;                           \
2248     if (nref == 4) nref = 10;                           \
2249     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2250   } while (0)
2251
2252
2253 /* Decode a composition rule represented by C and the following byte
2254    at SRC as a component of composition sequence of Emacs 21 style.
2255    Set RULE to the decoded rule.  */
2256
2257 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2258   do {                                                  \
2259     int gref, nref;                                     \
2260                                                         \
2261     gref = c - 0x20;                                    \
2262     if (gref < 0 || gref >= 81)                         \
2263       goto invalid_code;                                \
2264     ONE_MORE_BYTE (c);                                  \
2265     nref = c - 0x20;                                    \
2266     if (nref < 0 || nref >= 81)                         \
2267       goto invalid_code;                                \
2268     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2269   } while (0)
2270
2271
2272 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2273    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2274    byte length of this composition information, CHARS is the number of
2275    characters composed by this composition.  */
2276
2277 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2278   do {                                                                  \
2279     enum composition_method method = c - 0xF2;                          \
2280     int *charbuf_base = charbuf;                                        \
2281     int nbytes, nchars;                                                 \
2282                                                                         \
2283     ONE_MORE_BYTE (c);                                                  \
2284     if (c < 0)                                                          \
2285       goto invalid_code;                                                \
2286     nbytes = c - 0xA0;                                                  \
2287     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2288       goto invalid_code;                                                \
2289     ONE_MORE_BYTE (c);                                                  \
2290     nchars = c - 0xA0;                                                  \
2291     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2292       goto invalid_code;                                                \
2293     cmp_status->old_form = 0;                                           \
2294     cmp_status->method = method;                                        \
2295     if (method == COMPOSITION_RELATIVE)                                 \
2296       cmp_status->state = COMPOSING_CHAR;                               \
2297     else                                                                \
2298       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2299     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2300     cmp_status->nchars = nchars;                                        \
2301     cmp_status->ncomps = nbytes - 4;                                    \
2302     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2303   } while (0)
2304
2305
2306 /* Start of Emacs 20 style format for relative composition.  */
2307
2308 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2309   do {                                                          \
2310     cmp_status->old_form = 1;                                   \
2311     cmp_status->method = COMPOSITION_RELATIVE;                  \
2312     cmp_status->state = COMPOSING_CHAR;                         \
2313     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2314     cmp_status->nchars = cmp_status->ncomps = 0;                \
2315     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2316   } while (0)
2317
2318
2319 /* Start of Emacs 20 style format for rule-base composition.  */
2320
2321 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2322   do {                                                          \
2323     cmp_status->old_form = 1;                                   \
2324     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2325     cmp_status->state = COMPOSING_CHAR;                         \
2326     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2327     cmp_status->nchars = cmp_status->ncomps = 0;                \
2328     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2329   } while (0)
2330
2331
2332 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2333   do {                                                  \
2334     const unsigned char *current_src = src;             \
2335                                                         \
2336     ONE_MORE_BYTE (c);                                  \
2337     if (c < 0)                                          \
2338       goto invalid_code;                                \
2339     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2340         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2341       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2342     else if (c < 0xA0)                                  \
2343       goto invalid_code;                                \
2344     else if (c < 0xC0)                                  \
2345       {                                                 \
2346         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2347         /* Re-read C as a composition component.  */    \
2348         src = current_src;                              \
2349       }                                                 \
2350     else if (c == 0xFF)                                 \
2351       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2352     else                                                \
2353       goto invalid_code;                                \
2354   } while (0)
2355
2356 #define EMACS_MULE_COMPOSITION_END()                            \
2357   do {                                                          \
2358     int idx = - cmp_status->length;                             \
2359                                                                 \
2360     if (cmp_status->old_form)                                   \
2361       charbuf[idx + 2] = cmp_status->nchars;                    \
2362     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2363       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2364     cmp_status->state = COMPOSING_NO;                           \
2365   } while (0)
2366
2367
2368 static int
2369 emacs_mule_finish_composition (int *charbuf,
2370                                struct composition_status *cmp_status)
2371 {
2372   int idx = - cmp_status->length;
2373   int new_chars;
2374
2375   if (cmp_status->old_form && cmp_status->nchars > 0)
2376     {
2377       charbuf[idx + 2] = cmp_status->nchars;
2378       new_chars = 0;
2379       if (cmp_status->method == COMPOSITION_WITH_RULE
2380           && cmp_status->state == COMPOSING_CHAR)
2381         {
2382           /* The last rule was invalid.  */
2383           int rule = charbuf[-1] + 0xA0;
2384
2385           charbuf[-2] = BYTE8_TO_CHAR (rule);
2386           charbuf[-1] = -1;
2387           new_chars = 1;
2388         }
2389     }
2390   else
2391     {
2392       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2393
2394       if (cmp_status->method == COMPOSITION_WITH_RULE)
2395         {
2396           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2397           charbuf[idx++] = -3;
2398           charbuf[idx++] = 0;
2399           new_chars = 1;
2400         }
2401       else
2402         {
2403           int nchars = charbuf[idx + 1] + 0xA0;
2404           int nbytes = charbuf[idx + 2] + 0xA0;
2405
2406           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2407           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2408           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2409           charbuf[idx++] = -1;
2410           new_chars = 4;
2411         }
2412     }
2413   cmp_status->state = COMPOSING_NO;
2414   return new_chars;
2415 }
2416
2417 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2418   do {                                                                    \
2419     if (cmp_status->state != COMPOSING_NO)                                \
2420       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2421   } while (0)
2422
2423
2424 static void
2425 decode_coding_emacs_mule (struct coding_system *coding)
2426 {
2427   const unsigned char *src = coding->source + coding->consumed;
2428   const unsigned char *src_end = coding->source + coding->src_bytes;
2429   const unsigned char *src_base;
2430   int *charbuf = coding->charbuf + coding->charbuf_used;
2431   /* We may produce two annocations (charset and composition) in one
2432      loop and one more charset annocation at the end.  */
2433   int *charbuf_end
2434     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2435   int consumed_chars = 0, consumed_chars_base;
2436   int multibytep = coding->src_multibyte;
2437   Lisp_Object attrs, charset_list;
2438   int char_offset = coding->produced_char;
2439   int last_offset = char_offset;
2440   int last_id = charset_ascii;
2441   int eol_crlf =
2442     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2443   int byte_after_cr = -1;
2444   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2445
2446   CODING_GET_INFO (coding, attrs, charset_list);
2447
2448   if (cmp_status->state != COMPOSING_NO)
2449     {
2450       int i;
2451
2452       for (i = 0; i < cmp_status->length; i++)
2453         *charbuf++ = cmp_status->carryover[i];
2454       coding->annotated = 1;
2455     }
2456
2457   while (1)
2458     {
2459       int c, id;
2460
2461       src_base = src;
2462       consumed_chars_base = consumed_chars;
2463
2464       if (charbuf >= charbuf_end)
2465         {
2466           if (byte_after_cr >= 0)
2467             src_base--;
2468           break;
2469         }
2470
2471       if (byte_after_cr >= 0)
2472         c = byte_after_cr, byte_after_cr = -1;
2473       else
2474         ONE_MORE_BYTE (c);
2475
2476       if (c < 0 || c == 0x80)
2477         {
2478           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2479           if (c < 0)
2480             {
2481               *charbuf++ = -c;
2482               char_offset++;
2483             }
2484           else
2485             DECODE_EMACS_MULE_COMPOSITION_START ();
2486           continue;
2487         }
2488
2489       if (c < 0x80)
2490         {
2491           if (eol_crlf && c == '\r')
2492             ONE_MORE_BYTE (byte_after_cr);
2493           id = charset_ascii;
2494           if (cmp_status->state != COMPOSING_NO)
2495             {
2496               if (cmp_status->old_form)
2497                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2498               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2499                 cmp_status->ncomps--;
2500             }
2501         }
2502       else
2503         {
2504           int nchars, nbytes;
2505           /* emacs_mule_char can load a charset map from a file, which
2506              allocates a large structure and might cause buffer text
2507              to be relocated as result.  Thus, we need to remember the
2508              original pointer to buffer text, and fixup all related
2509              pointers after the call.  */
2510           const unsigned char *orig = coding->source;
2511           EMACS_INT offset;
2512
2513           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2514                                cmp_status);
2515           offset = coding->source - orig;
2516           if (offset)
2517             {
2518               src += offset;
2519               src_base += offset;
2520               src_end += offset;
2521             }
2522           if (c < 0)
2523             {
2524               if (c == -1)
2525                 goto invalid_code;
2526               if (c == -2)
2527                 break;
2528             }
2529           src = src_base + nbytes;
2530           consumed_chars = consumed_chars_base + nchars;
2531           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2532             cmp_status->ncomps -= nchars;
2533         }
2534
2535       /* Now if C >= 0, we found a normally encoded characer, if C <
2536          0, we found an old-style composition component character or
2537          rule.  */
2538
2539       if (cmp_status->state == COMPOSING_NO)
2540         {
2541           if (last_id != id)
2542             {
2543               if (last_id != charset_ascii)
2544                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2545                                   last_id);
2546               last_id = id;
2547               last_offset = char_offset;
2548             }
2549           *charbuf++ = c;
2550           char_offset++;
2551         }
2552       else if (cmp_status->state == COMPOSING_CHAR)
2553         {
2554           if (cmp_status->old_form)
2555             {
2556               if (c >= 0)
2557                 {
2558                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2559                   *charbuf++ = c;
2560                   char_offset++;
2561                 }
2562               else
2563                 {
2564                   *charbuf++ = -c;
2565                   cmp_status->nchars++;
2566                   cmp_status->length++;
2567                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2568                     EMACS_MULE_COMPOSITION_END ();
2569                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2570                     cmp_status->state = COMPOSING_RULE;
2571                 }
2572             }
2573           else
2574             {
2575               *charbuf++ = c;
2576               cmp_status->length++;
2577               cmp_status->nchars--;
2578               if (cmp_status->nchars == 0)
2579                 EMACS_MULE_COMPOSITION_END ();
2580             }
2581         }
2582       else if (cmp_status->state == COMPOSING_RULE)
2583         {
2584           int rule;
2585
2586           if (c >= 0)
2587             {
2588               EMACS_MULE_COMPOSITION_END ();
2589               *charbuf++ = c;
2590               char_offset++;
2591             }
2592           else
2593             {
2594               c = -c;
2595               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2596               if (rule < 0)
2597                 goto invalid_code;
2598               *charbuf++ = -2;
2599               *charbuf++ = rule;
2600               cmp_status->length += 2;
2601               cmp_status->state = COMPOSING_CHAR;
2602             }
2603         }
2604       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2605         {
2606           *charbuf++ = c;
2607           cmp_status->length++;
2608           if (cmp_status->ncomps == 0)
2609             cmp_status->state = COMPOSING_CHAR;
2610           else if (cmp_status->ncomps > 0)
2611             {
2612               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2613                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2614             }
2615           else
2616             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2617         }
2618       else                      /* COMPOSING_COMPONENT_RULE */
2619         {
2620           int rule;
2621
2622           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2623           if (rule < 0)
2624             goto invalid_code;
2625           *charbuf++ = -2;
2626           *charbuf++ = rule;
2627           cmp_status->length += 2;
2628           cmp_status->ncomps--;
2629           if (cmp_status->ncomps > 0)
2630             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2631           else
2632             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2633         }
2634       continue;
2635
2636     retry:
2637       src = src_base;
2638       consumed_chars = consumed_chars_base;
2639       continue;
2640
2641     invalid_code:
2642       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2643       src = src_base;
2644       consumed_chars = consumed_chars_base;
2645       ONE_MORE_BYTE (c);
2646       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2647       char_offset++;
2648       coding->errors++;
2649     }
2650
2651  no_more_source:
2652   if (cmp_status->state != COMPOSING_NO)
2653     {
2654       if (coding->mode & CODING_MODE_LAST_BLOCK)
2655         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2656       else
2657         {
2658           int i;
2659
2660           charbuf -= cmp_status->length;
2661           for (i = 0; i < cmp_status->length; i++)
2662             cmp_status->carryover[i] = charbuf[i];
2663         }
2664     }
2665   if (last_id != charset_ascii)
2666     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2667   coding->consumed_char += consumed_chars_base;
2668   coding->consumed = src_base - coding->source;
2669   coding->charbuf_used = charbuf - coding->charbuf;
2670 }
2671
2672
2673 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2674   do {                                          \
2675     if (id < 0xA0)                              \
2676       codes[0] = id, codes[1] = 0;              \
2677     else if (id < 0xE0)                         \
2678       codes[0] = 0x9A, codes[1] = id;           \
2679     else if (id < 0xF0)                         \
2680       codes[0] = 0x9B, codes[1] = id;           \
2681     else if (id < 0xF5)                         \
2682       codes[0] = 0x9C, codes[1] = id;           \
2683     else                                        \
2684       codes[0] = 0x9D, codes[1] = id;           \
2685   } while (0);
2686
2687
2688 static int
2689 encode_coding_emacs_mule (struct coding_system *coding)
2690 {
2691   int multibytep = coding->dst_multibyte;
2692   int *charbuf = coding->charbuf;
2693   int *charbuf_end = charbuf + coding->charbuf_used;
2694   unsigned char *dst = coding->destination + coding->produced;
2695   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2696   int safe_room = 8;
2697   int produced_chars = 0;
2698   Lisp_Object attrs, charset_list;
2699   int c;
2700   int preferred_charset_id = -1;
2701
2702   CODING_GET_INFO (coding, attrs, charset_list);
2703   if (! EQ (charset_list, Vemacs_mule_charset_list))
2704     {
2705       CODING_ATTR_CHARSET_LIST (attrs)
2706         = charset_list = Vemacs_mule_charset_list;
2707     }
2708
2709   while (charbuf < charbuf_end)
2710     {
2711       ASSURE_DESTINATION (safe_room);
2712       c = *charbuf++;
2713
2714       if (c < 0)
2715         {
2716           /* Handle an annotation.  */
2717           switch (*charbuf)
2718             {
2719             case CODING_ANNOTATE_COMPOSITION_MASK:
2720               /* Not yet implemented.  */
2721               break;
2722             case CODING_ANNOTATE_CHARSET_MASK:
2723               preferred_charset_id = charbuf[3];
2724               if (preferred_charset_id >= 0
2725                   && NILP (Fmemq (make_number (preferred_charset_id),
2726                                   charset_list)))
2727                 preferred_charset_id = -1;
2728               break;
2729             default:
2730               abort ();
2731             }
2732           charbuf += -c - 1;
2733           continue;
2734         }
2735
2736       if (ASCII_CHAR_P (c))
2737         EMIT_ONE_ASCII_BYTE (c);
2738       else if (CHAR_BYTE8_P (c))
2739         {
2740           c = CHAR_TO_BYTE8 (c);
2741           EMIT_ONE_BYTE (c);
2742         }
2743       else
2744         {
2745           struct charset *charset;
2746           unsigned code;
2747           int dimension;
2748           int emacs_mule_id;
2749           unsigned char leading_codes[2];
2750
2751           if (preferred_charset_id >= 0)
2752             {
2753               charset = CHARSET_FROM_ID (preferred_charset_id);
2754               if (CHAR_CHARSET_P (c, charset))
2755                 code = ENCODE_CHAR (charset, c);
2756               else
2757                 charset = char_charset (c, charset_list, &code);
2758             }
2759           else
2760             charset = char_charset (c, charset_list, &code);
2761           if (! charset)
2762             {
2763               c = coding->default_char;
2764               if (ASCII_CHAR_P (c))
2765                 {
2766                   EMIT_ONE_ASCII_BYTE (c);
2767                   continue;
2768                 }
2769               charset = char_charset (c, charset_list, &code);
2770             }
2771           dimension = CHARSET_DIMENSION (charset);
2772           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2773           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2774           EMIT_ONE_BYTE (leading_codes[0]);
2775           if (leading_codes[1])
2776             EMIT_ONE_BYTE (leading_codes[1]);
2777           if (dimension == 1)
2778             EMIT_ONE_BYTE (code | 0x80);
2779           else
2780             {
2781               code |= 0x8080;
2782               EMIT_ONE_BYTE (code >> 8);
2783               EMIT_ONE_BYTE (code & 0xFF);
2784             }
2785         }
2786     }
2787   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2788   coding->produced_char += produced_chars;
2789   coding->produced = dst - coding->destination;
2790   return 0;
2791 }
2792
2793 \f
2794 /*** 7. ISO2022 handlers ***/
2795
2796 /* The following note describes the coding system ISO2022 briefly.
2797    Since the intention of this note is to help understand the
2798    functions in this file, some parts are NOT ACCURATE or are OVERLY
2799    SIMPLIFIED.  For thorough understanding, please refer to the
2800    original document of ISO2022.  This is equivalent to the standard
2801    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2802
2803    ISO2022 provides many mechanisms to encode several character sets
2804    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2805    is encoded using bytes less than 128.  This may make the encoded
2806    text a little bit longer, but the text passes more easily through
2807    several types of gateway, some of which strip off the MSB (Most
2808    Significant Bit).
2809
2810    There are two kinds of character sets: control character sets and
2811    graphic character sets.  The former contain control characters such
2812    as `newline' and `escape' to provide control functions (control
2813    functions are also provided by escape sequences).  The latter
2814    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2815    two control character sets and many graphic character sets.
2816
2817    Graphic character sets are classified into one of the following
2818    four classes, according to the number of bytes (DIMENSION) and
2819    number of characters in one dimension (CHARS) of the set:
2820    - DIMENSION1_CHARS94
2821    - DIMENSION1_CHARS96
2822    - DIMENSION2_CHARS94
2823    - DIMENSION2_CHARS96
2824
2825    In addition, each character set is assigned an identification tag,
2826    unique for each set, called the "final character" (denoted as <F>
2827    hereafter).  The <F> of each character set is decided by ECMA(*)
2828    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2829    (0x30..0x3F are for private use only).
2830
2831    Note (*): ECMA = European Computer Manufacturers Association
2832
2833    Here are examples of graphic character sets [NAME(<F>)]:
2834         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2835         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2836         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2837         o DIMENSION2_CHARS96 -- none for the moment
2838
2839    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2840         C0 [0x00..0x1F] -- control character plane 0
2841         GL [0x20..0x7F] -- graphic character plane 0
2842         C1 [0x80..0x9F] -- control character plane 1
2843         GR [0xA0..0xFF] -- graphic character plane 1
2844
2845    A control character set is directly designated and invoked to C0 or
2846    C1 by an escape sequence.  The most common case is that:
2847    - ISO646's  control character set is designated/invoked to C0, and
2848    - ISO6429's control character set is designated/invoked to C1,
2849    and usually these designations/invocations are omitted in encoded
2850    text.  In a 7-bit environment, only C0 can be used, and a control
2851    character for C1 is encoded by an appropriate escape sequence to
2852    fit into the environment.  All control characters for C1 are
2853    defined to have corresponding escape sequences.
2854
2855    A graphic character set is at first designated to one of four
2856    graphic registers (G0 through G3), then these graphic registers are
2857    invoked to GL or GR.  These designations and invocations can be
2858    done independently.  The most common case is that G0 is invoked to
2859    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2860    these invocations and designations are omitted in encoded text.
2861    In a 7-bit environment, only GL can be used.
2862
2863    When a graphic character set of CHARS94 is invoked to GL, codes
2864    0x20 and 0x7F of the GL area work as control characters SPACE and
2865    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2866    be used.
2867
2868    There are two ways of invocation: locking-shift and single-shift.
2869    With locking-shift, the invocation lasts until the next different
2870    invocation, whereas with single-shift, the invocation affects the
2871    following character only and doesn't affect the locking-shift
2872    state.  Invocations are done by the following control characters or
2873    escape sequences:
2874
2875    ----------------------------------------------------------------------
2876    abbrev  function                  cntrl escape seq   description
2877    ----------------------------------------------------------------------
2878    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2879    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2880    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2881    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2882    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2883    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2884    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2885    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2886    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2887    ----------------------------------------------------------------------
2888    (*) These are not used by any known coding system.
2889
2890    Control characters for these functions are defined by macros
2891    ISO_CODE_XXX in `coding.h'.
2892
2893    Designations are done by the following escape sequences:
2894    ----------------------------------------------------------------------
2895    escape sequence      description
2896    ----------------------------------------------------------------------
2897    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2898    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2899    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2900    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2901    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2902    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2903    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2904    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2905    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2906    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2907    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2908    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2909    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2910    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2911    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2912    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2913    ----------------------------------------------------------------------
2914
2915    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2916    of dimension 1, chars 94, and final character <F>, etc...
2917
2918    Note (*): Although these designations are not allowed in ISO2022,
2919    Emacs accepts them on decoding, and produces them on encoding
2920    CHARS96 character sets in a coding system which is characterized as
2921    7-bit environment, non-locking-shift, and non-single-shift.
2922
2923    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2924    '(' must be omitted.  We refer to this as "short-form" hereafter.
2925
2926    Now you may notice that there are a lot of ways of encoding the
2927    same multilingual text in ISO2022.  Actually, there exist many
2928    coding systems such as Compound Text (used in X11's inter client
2929    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2930    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2931    localized platforms), and all of these are variants of ISO2022.
2932
2933    In addition to the above, Emacs handles two more kinds of escape
2934    sequences: ISO6429's direction specification and Emacs' private
2935    sequence for specifying character composition.
2936
2937    ISO6429's direction specification takes the following form:
2938         o CSI ']'      -- end of the current direction
2939         o CSI '0' ']'  -- end of the current direction
2940         o CSI '1' ']'  -- start of left-to-right text
2941         o CSI '2' ']'  -- start of right-to-left text
2942    The control character CSI (0x9B: control sequence introducer) is
2943    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2944
2945    Character composition specification takes the following form:
2946         o ESC '0' -- start relative composition
2947         o ESC '1' -- end composition
2948         o ESC '2' -- start rule-base composition (*)
2949         o ESC '3' -- start relative composition with alternate chars  (**)
2950         o ESC '4' -- start rule-base composition with alternate chars  (**)
2951   Since these are not standard escape sequences of any ISO standard,
2952   the use of them with these meanings is restricted to Emacs only.
2953
2954   (*) This form is used only in Emacs 20.7 and older versions,
2955   but newer versions can safely decode it.
2956   (**) This form is used only in Emacs 21.1 and newer versions,
2957   and older versions can't decode it.
2958
2959   Here's a list of example usages of these composition escape
2960   sequences (categorized by `enum composition_method').
2961
2962   COMPOSITION_RELATIVE:
2963         ESC 0 CHAR [ CHAR ] ESC 1
2964   COMPOSITION_WITH_RULE:
2965         ESC 2 CHAR [ RULE CHAR ] ESC 1
2966   COMPOSITION_WITH_ALTCHARS:
2967         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2968   COMPOSITION_WITH_RULE_ALTCHARS:
2969         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2970
2971 enum iso_code_class_type iso_code_class[256];
2972
2973 #define SAFE_CHARSET_P(coding, id)      \
2974   ((id) <= (coding)->max_charset_id     \
2975    && (coding)->safe_charsets[id] != 255)
2976
2977
2978 #define SHIFT_OUT_OK(category)  \
2979   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2980
2981 static void
2982 setup_iso_safe_charsets (Lisp_Object attrs)
2983 {
2984   Lisp_Object charset_list, safe_charsets;
2985   Lisp_Object request;
2986   Lisp_Object reg_usage;
2987   Lisp_Object tail;
2988   int reg94, reg96;
2989   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2990   int max_charset_id;
2991
2992   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2993   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2994       && ! EQ (charset_list, Viso_2022_charset_list))
2995     {
2996       CODING_ATTR_CHARSET_LIST (attrs)
2997         = charset_list = Viso_2022_charset_list;
2998       ASET (attrs, coding_attr_safe_charsets, Qnil);
2999     }
3000
3001   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3002     return;
3003
3004   max_charset_id = 0;
3005   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3006     {
3007       int id = XINT (XCAR (tail));
3008       if (max_charset_id < id)
3009         max_charset_id = id;
3010     }
3011
3012   safe_charsets = make_uninit_string (max_charset_id + 1);
3013   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
3014   request = AREF (attrs, coding_attr_iso_request);
3015   reg_usage = AREF (attrs, coding_attr_iso_usage);
3016   reg94 = XINT (XCAR (reg_usage));
3017   reg96 = XINT (XCDR (reg_usage));
3018
3019   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3020     {
3021       Lisp_Object id;
3022       Lisp_Object reg;
3023       struct charset *charset;
3024
3025       id = XCAR (tail);
3026       charset = CHARSET_FROM_ID (XINT (id));
3027       reg = Fcdr (Fassq (id, request));
3028       if (! NILP (reg))
3029         SSET (safe_charsets, XINT (id), XINT (reg));
3030       else if (charset->iso_chars_96)
3031         {
3032           if (reg96 < 4)
3033             SSET (safe_charsets, XINT (id), reg96);
3034         }
3035       else
3036         {
3037           if (reg94 < 4)
3038             SSET (safe_charsets, XINT (id), reg94);
3039         }
3040     }
3041   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3042 }
3043
3044
3045 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3046    Check if a text is encoded in one of ISO-2022 based codig systems.
3047    If it is, return 1, else return 0.  */
3048
3049 static int
3050 detect_coding_iso_2022 (struct coding_system *coding,
3051                         struct coding_detection_info *detect_info)
3052 {
3053   const unsigned char *src = coding->source, *src_base = src;
3054   const unsigned char *src_end = coding->source + coding->src_bytes;
3055   int multibytep = coding->src_multibyte;
3056   int single_shifting = 0;
3057   int id;
3058   int c, c1;
3059   int consumed_chars = 0;
3060   int i;
3061   int rejected = 0;
3062   int found = 0;
3063   int composition_count = -1;
3064
3065   detect_info->checked |= CATEGORY_MASK_ISO;
3066
3067   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3068     {
3069       struct coding_system *this = &(coding_categories[i]);
3070       Lisp_Object attrs, val;
3071
3072       if (this->id < 0)
3073         continue;
3074       attrs = CODING_ID_ATTRS (this->id);
3075       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3076           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3077         setup_iso_safe_charsets (attrs);
3078       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3079       this->max_charset_id = SCHARS (val) - 1;
3080       this->safe_charsets = SDATA (val);
3081     }
3082
3083   /* A coding system of this category is always ASCII compatible.  */
3084   src += coding->head_ascii;
3085
3086   while (rejected != CATEGORY_MASK_ISO)
3087     {
3088       src_base = src;
3089       ONE_MORE_BYTE (c);
3090       switch (c)
3091         {
3092         case ISO_CODE_ESC:
3093           if (inhibit_iso_escape_detection)
3094             break;
3095           single_shifting = 0;
3096           ONE_MORE_BYTE (c);
3097           if (c >= '(' && c <= '/')
3098             {
3099               /* Designation sequence for a charset of dimension 1.  */
3100               ONE_MORE_BYTE (c1);
3101               if (c1 < ' ' || c1 >= 0x80
3102                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3103                 /* Invalid designation sequence.  Just ignore.  */
3104                 break;
3105             }
3106           else if (c == '$')
3107             {
3108               /* Designation sequence for a charset of dimension 2.  */
3109               ONE_MORE_BYTE (c);
3110               if (c >= '@' && c <= 'B')
3111                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3112                 id = iso_charset_table[1][0][c];
3113               else if (c >= '(' && c <= '/')
3114                 {
3115                   ONE_MORE_BYTE (c1);
3116                   if (c1 < ' ' || c1 >= 0x80
3117                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3118                     /* Invalid designation sequence.  Just ignore.  */
3119                     break;
3120                 }
3121               else
3122                 /* Invalid designation sequence.  Just ignore it.  */
3123                 break;
3124             }
3125           else if (c == 'N' || c == 'O')
3126             {
3127               /* ESC <Fe> for SS2 or SS3.  */
3128               single_shifting = 1;
3129               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3130               break;
3131             }
3132           else if (c == '1')
3133             {
3134               /* End of composition.  */
3135               if (composition_count < 0
3136                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3137                 /* Invalid */
3138                 break;
3139               composition_count = -1;
3140               found |= CATEGORY_MASK_ISO;
3141             }
3142           else if (c >= '0' && c <= '4')
3143             {
3144               /* ESC <Fp> for start/end composition.  */
3145               composition_count = 0;
3146               break;
3147             }
3148           else
3149             {
3150               /* Invalid escape sequence.  Just ignore it.  */
3151               break;
3152             }
3153
3154           /* We found a valid designation sequence for CHARSET.  */
3155           rejected |= CATEGORY_MASK_ISO_8BIT;
3156           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3157                               id))
3158             found |= CATEGORY_MASK_ISO_7;
3159           else
3160             rejected |= CATEGORY_MASK_ISO_7;
3161           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3162                               id))
3163             found |= CATEGORY_MASK_ISO_7_TIGHT;
3164           else
3165             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3166           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3167                               id))
3168             found |= CATEGORY_MASK_ISO_7_ELSE;
3169           else
3170             rejected |= CATEGORY_MASK_ISO_7_ELSE;
3171           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3172                               id))
3173             found |= CATEGORY_MASK_ISO_8_ELSE;
3174           else
3175             rejected |= CATEGORY_MASK_ISO_8_ELSE;
3176           break;
3177
3178         case ISO_CODE_SO:
3179         case ISO_CODE_SI:
3180           /* Locking shift out/in.  */
3181           if (inhibit_iso_escape_detection)
3182             break;
3183           single_shifting = 0;
3184           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3185           break;
3186
3187         case ISO_CODE_CSI:
3188           /* Control sequence introducer.  */
3189           single_shifting = 0;
3190           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3191           found |= CATEGORY_MASK_ISO_8_ELSE;
3192           goto check_extra_latin;
3193
3194         case ISO_CODE_SS2:
3195         case ISO_CODE_SS3:
3196           /* Single shift.   */
3197           if (inhibit_iso_escape_detection)
3198             break;
3199           single_shifting = 0;
3200           rejected |= CATEGORY_MASK_ISO_7BIT;
3201           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3202               & CODING_ISO_FLAG_SINGLE_SHIFT)
3203             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
3204           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3205               & CODING_ISO_FLAG_SINGLE_SHIFT)
3206             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3207           if (single_shifting)
3208             break;
3209           goto check_extra_latin;
3210
3211         default:
3212           if (c < 0)
3213             continue;
3214           if (c < 0x80)
3215             {
3216               if (composition_count >= 0)
3217                 composition_count++;
3218               single_shifting = 0;
3219               break;
3220             }
3221           if (c >= 0xA0)
3222             {
3223               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3224               found |= CATEGORY_MASK_ISO_8_1;
3225               /* Check the length of succeeding codes of the range
3226                  0xA0..0FF.  If the byte length is even, we include
3227                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3228                  only when we are not single shifting.  */
3229               if (! single_shifting
3230                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3231                 {
3232                   int i = 1;
3233                   while (src < src_end)
3234                     {
3235                       src_base = src;
3236                       ONE_MORE_BYTE (c);
3237                       if (c < 0xA0)
3238                         {
3239                           src = src_base;
3240                           break;
3241                         }
3242                       i++;
3243                     }
3244
3245                   if (i & 1 && src < src_end)
3246                     {
3247                       rejected |= CATEGORY_MASK_ISO_8_2;
3248                       if (composition_count >= 0)
3249                         composition_count += i;
3250                     }
3251                   else
3252                     {
3253                       found |= CATEGORY_MASK_ISO_8_2;
3254                       if (composition_count >= 0)
3255                         composition_count += i / 2;
3256                     }
3257                 }
3258               break;
3259             }
3260         check_extra_latin:
3261           single_shifting = 0;
3262           if (! VECTORP (Vlatin_extra_code_table)
3263               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3264             {
3265               rejected = CATEGORY_MASK_ISO;
3266               break;
3267             }
3268           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3269               & CODING_ISO_FLAG_LATIN_EXTRA)
3270             found |= CATEGORY_MASK_ISO_8_1;
3271           else
3272             rejected |= CATEGORY_MASK_ISO_8_1;
3273           rejected |= CATEGORY_MASK_ISO_8_2;
3274         }
3275     }
3276   detect_info->rejected |= CATEGORY_MASK_ISO;
3277   return 0;
3278
3279  no_more_source:
3280   detect_info->rejected |= rejected;
3281   detect_info->found |= (found & ~rejected);
3282   return 1;
3283 }
3284
3285
3286 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3287    escape sequence should be kept.  */
3288 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3289   do {                                                                  \
3290     int id, prev;                                                       \
3291                                                                         \
3292     if (final < '0' || final >= 128                                     \
3293         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3294         || !SAFE_CHARSET_P (coding, id))                                \
3295       {                                                                 \
3296         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3297         chars_96 = -1;                                                  \
3298         break;                                                          \
3299       }                                                                 \
3300     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3301     if (id == charset_jisx0201_roman)                                   \
3302       {                                                                 \
3303         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3304           id = charset_ascii;                                           \
3305       }                                                                 \
3306     else if (id == charset_jisx0208_1978)                               \
3307       {                                                                 \
3308         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3309           id = charset_jisx0208;                                        \
3310       }                                                                 \
3311     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3312     /* If there was an invalid designation to REG previously, and this  \
3313        designation is ASCII to REG, we should keep this designation     \
3314        sequence.  */                                                    \
3315     if (prev == -2 && id == charset_ascii)                              \
3316       chars_96 = -1;                                                    \
3317   } while (0)
3318
3319
3320 /* Handle these composition sequence (ALT: alternate char):
3321
3322    (1) relative composition: ESC 0 CHAR ... ESC 1
3323    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3324    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3325    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3326
3327    When the start sequence (ESC 0/2/3/4) is found, this annotation
3328    header is produced.
3329
3330         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3331
3332    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3333    produced until the end sequence (ESC 1) is found:
3334
3335    (1) CHAR ... CHAR
3336    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3337    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3338    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3339
3340    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3341    annotation header is updated as below:
3342
3343    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3344    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3345    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3346    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3347
3348    If an error is found while composing, the annotation header is
3349    changed to:
3350
3351         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3352
3353    and the sequence [ -2 DECODED-RULE ] is changed to the original
3354    byte sequence as below:
3355         o the original byte sequence is B: [ B -1 ]
3356         o the original byte sequence is B1 B2: [ B1 B2 ]
3357    and the sequence [ -1 -1 ] is changed to the original byte
3358    sequence:
3359         [ ESC '0' ]
3360 */
3361
3362 /* Decode a composition rule C1 and maybe one more byte from the
3363    source, and set RULE to the encoded composition rule, NBYTES to the
3364    length of the composition rule.  If the rule is invalid, set RULE
3365    to some negative value.  */
3366
3367 #define DECODE_COMPOSITION_RULE(rule, nbytes)                           \
3368   do {                                                                  \
3369     rule = c1 - 32;                                                     \
3370     if (rule < 0)                                                       \
3371       break;                                                            \
3372     if (rule < 81)              /* old format (before ver.21) */        \
3373       {                                                                 \
3374         int gref = (rule) / 9;                                          \
3375         int nref = (rule) % 9;                                          \
3376         if (gref == 4) gref = 10;                                       \
3377         if (nref == 4) nref = 10;                                       \
3378         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3379         nbytes = 1;                                                     \
3380       }                                                                 \
3381     else                        /* new format (after ver.21) */         \
3382       {                                                                 \
3383         int c;                                                          \
3384                                                                         \
3385         ONE_MORE_BYTE (c);                                              \
3386         rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32);             \
3387         if (rule >= 0)                                                  \
3388           rule += 0x100;   /* to destinguish it from the old format */  \
3389         nbytes = 2;                                                     \
3390       }                                                                 \
3391   } while (0)
3392
3393 #define ENCODE_COMPOSITION_RULE(rule)                           \
3394   do {                                                          \
3395     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3396                                                                 \
3397     if (rule < 0x100)           /* old format */                \
3398       {                                                         \
3399         if (gref == 10) gref = 4;                               \
3400         if (nref == 10) nref = 4;                               \
3401         charbuf[idx] = 32 + gref * 9 + nref;                    \
3402         charbuf[idx + 1] = -1;                                  \
3403         new_chars++;                                            \
3404       }                                                         \
3405     else                                /* new format */        \
3406       {                                                         \
3407         charbuf[idx] = 32 + 81 + gref;                          \
3408         charbuf[idx + 1] = 32 + nref;                           \
3409         new_chars += 2;                                         \
3410       }                                                         \
3411   } while (0)
3412
3413 /* Finish the current composition as invalid.  */
3414
3415 static int finish_composition (int *, struct composition_status *);
3416
3417 static int
3418 finish_composition (int *charbuf, struct composition_status *cmp_status)
3419 {
3420   int idx = - cmp_status->length;
3421   int new_chars;
3422
3423   /* Recover the original ESC sequence */
3424   charbuf[idx++] = ISO_CODE_ESC;
3425   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3426                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3427                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3428                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3429                     : '4');
3430   charbuf[idx++] = -2;
3431   charbuf[idx++] = 0;
3432   charbuf[idx++] = -1;
3433   new_chars = cmp_status->nchars;
3434   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3435     for (; idx < 0; idx++)
3436       {
3437         int elt = charbuf[idx];
3438
3439         if (elt == -2)
3440           {
3441             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3442             idx++;
3443           }
3444         else if (elt == -1)
3445           {
3446             charbuf[idx++] = ISO_CODE_ESC;
3447             charbuf[idx] = '0';
3448             new_chars += 2;
3449           }
3450       }
3451   cmp_status->state = COMPOSING_NO;
3452   return new_chars;
3453 }
3454
3455 /* If characers are under composition, finish the composition.  */
3456 #define MAYBE_FINISH_COMPOSITION()                              \
3457   do {                                                          \
3458     if (cmp_status->state != COMPOSING_NO)                      \
3459       char_offset += finish_composition (charbuf, cmp_status);  \
3460   } while (0)
3461
3462 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3463
3464    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3465    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3466    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3467    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3468
3469    Produce this annotation sequence now:
3470
3471    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3472 */
3473
3474 #define DECODE_COMPOSITION_START(c1)                                       \
3475   do {                                                                     \
3476     if (c1 == '0'                                                          \
3477         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3478              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3479             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3480                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3481       {                                                                    \
3482         *charbuf++ = -1;                                                   \
3483         *charbuf++= -1;                                                    \
3484         cmp_status->state = COMPOSING_CHAR;                                \
3485         cmp_status->length += 2;                                           \
3486       }                                                                    \
3487     else                                                                   \
3488       {                                                                    \
3489         MAYBE_FINISH_COMPOSITION ();                                       \
3490         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3491                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3492                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3493                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3494         cmp_status->state                                                  \
3495           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3496         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3497         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3498         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3499         coding->annotated = 1;                                             \
3500       }                                                                    \
3501   } while (0)
3502
3503
3504 /* Handle composition end sequence ESC 1.  */
3505
3506 #define DECODE_COMPOSITION_END()                                        \
3507   do {                                                                  \
3508     if (cmp_status->nchars == 0                                         \
3509         || ((cmp_status->state == COMPOSING_CHAR)                       \
3510             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3511       {                                                                 \
3512         MAYBE_FINISH_COMPOSITION ();                                    \
3513         goto invalid_code;                                              \
3514       }                                                                 \
3515     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3516       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3517     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3518       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3519     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3520     char_offset += cmp_status->nchars;                                  \
3521     cmp_status->state = COMPOSING_NO;                                   \
3522   } while (0)
3523
3524 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3525
3526 #define STORE_COMPOSITION_RULE(rule)    \
3527   do {                                  \
3528     *charbuf++ = -2;                    \
3529     *charbuf++ = rule;                  \
3530     cmp_status->length += 2;            \
3531     cmp_status->state--;                \
3532   } while (0)
3533
3534 /* Store a composed char or a component char C in charbuf, and update
3535    cmp_status.  */
3536
3537 #define STORE_COMPOSITION_CHAR(c)                                       \
3538   do {                                                                  \
3539     *charbuf++ = (c);                                                   \
3540     cmp_status->length++;                                               \
3541     if (cmp_status->state == COMPOSING_CHAR)                            \
3542       cmp_status->nchars++;                                             \
3543     else                                                                \
3544       cmp_status->ncomps++;                                             \
3545     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3546         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3547             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3548       cmp_status->state++;                                              \
3549   } while (0)
3550
3551
3552 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3553
3554 static void
3555 decode_coding_iso_2022 (struct coding_system *coding)
3556 {
3557   const unsigned char *src = coding->source + coding->consumed;
3558   const unsigned char *src_end = coding->source + coding->src_bytes;
3559   const unsigned char *src_base;
3560   int *charbuf = coding->charbuf + coding->charbuf_used;
3561   /* We may produce two annocations (charset and composition) in one
3562      loop and one more charset annocation at the end.  */
3563   int *charbuf_end
3564     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3565   int consumed_chars = 0, consumed_chars_base;
3566   int multibytep = coding->src_multibyte;
3567   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3568   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3569   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3570   int charset_id_2, charset_id_3;
3571   struct charset *charset;
3572   int c;
3573   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3574   Lisp_Object attrs, charset_list;
3575   int char_offset = coding->produced_char;
3576   int last_offset = char_offset;
3577   int last_id = charset_ascii;
3578   int eol_crlf =
3579     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3580   int byte_after_cr = -1;
3581   int i;
3582
3583   CODING_GET_INFO (coding, attrs, charset_list);
3584   setup_iso_safe_charsets (attrs);
3585   /* Charset list may have been changed.  */
3586   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3587   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3588
3589   if (cmp_status->state != COMPOSING_NO)
3590     {
3591       for (i = 0; i < cmp_status->length; i++)
3592         *charbuf++ = cmp_status->carryover[i];
3593       coding->annotated = 1;
3594     }
3595
3596   while (1)
3597     {
3598       int c1, c2, c3;
3599
3600       src_base = src;
3601       consumed_chars_base = consumed_chars;
3602
3603       if (charbuf >= charbuf_end)
3604         {
3605           if (byte_after_cr >= 0)
3606             src_base--;
3607           break;
3608         }
3609
3610       if (byte_after_cr >= 0)
3611         c1 = byte_after_cr, byte_after_cr = -1;
3612       else
3613         ONE_MORE_BYTE (c1);
3614       if (c1 < 0)
3615         goto invalid_code;
3616
3617       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3618         {
3619           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3620           char_offset++;
3621           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3622           continue;
3623         }
3624
3625       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3626         {
3627           if (c1 == ISO_CODE_ESC)
3628             {
3629               if (src + 1 >= src_end)
3630                 goto no_more_source;
3631               *charbuf++ = ISO_CODE_ESC;
3632               char_offset++;
3633               if (src[0] == '%' && src[1] == '@')
3634                 {
3635                   src += 2;
3636                   consumed_chars += 2;
3637                   char_offset += 2;
3638                   /* We are sure charbuf can contain two more chars. */
3639                   *charbuf++ = '%';
3640                   *charbuf++ = '@';
3641                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3642                 }
3643             }
3644           else
3645             {
3646               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3647               char_offset++;
3648             }
3649           continue;
3650         }
3651
3652       if ((cmp_status->state == COMPOSING_RULE
3653            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3654           && c1 != ISO_CODE_ESC)
3655         {
3656           int rule, nbytes;
3657
3658           DECODE_COMPOSITION_RULE (rule, nbytes);
3659           if (rule < 0)
3660             goto invalid_code;
3661           STORE_COMPOSITION_RULE (rule);
3662           continue;
3663         }
3664
3665       /* We produce at most one character.  */
3666       switch (iso_code_class [c1])
3667         {
3668         case ISO_0x20_or_0x7F:
3669           if (charset_id_0 < 0
3670               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3671             /* This is SPACE or DEL.  */
3672             charset = CHARSET_FROM_ID (charset_ascii);
3673           else
3674             charset = CHARSET_FROM_ID (charset_id_0);
3675           break;
3676
3677         case ISO_graphic_plane_0:
3678           if (charset_id_0 < 0)
3679             charset = CHARSET_FROM_ID (charset_ascii);
3680           else
3681             charset = CHARSET_FROM_ID (charset_id_0);
3682           break;
3683
3684         case ISO_0xA0_or_0xFF:
3685           if (charset_id_1 < 0
3686               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3687               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3688             goto invalid_code;
3689           /* This is a graphic character, we fall down ... */
3690
3691         case ISO_graphic_plane_1:
3692           if (charset_id_1 < 0)
3693             goto invalid_code;
3694           charset = CHARSET_FROM_ID (charset_id_1);
3695           break;
3696
3697         case ISO_control_0:
3698           if (eol_crlf && c1 == '\r')
3699             ONE_MORE_BYTE (byte_after_cr);
3700           MAYBE_FINISH_COMPOSITION ();
3701           charset = CHARSET_FROM_ID (charset_ascii);
3702           break;
3703
3704         case ISO_control_1:
3705           goto invalid_code;
3706
3707         case ISO_shift_out:
3708           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3709               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3710             goto invalid_code;
3711           CODING_ISO_INVOCATION (coding, 0) = 1;
3712           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3713           continue;
3714
3715         case ISO_shift_in:
3716           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3717             goto invalid_code;
3718           CODING_ISO_INVOCATION (coding, 0) = 0;
3719           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3720           continue;
3721
3722         case ISO_single_shift_2_7:
3723           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3724             goto invalid_code;
3725         case ISO_single_shift_2:
3726           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3727             goto invalid_code;
3728           /* SS2 is handled as an escape sequence of ESC 'N' */
3729           c1 = 'N';
3730           goto label_escape_sequence;
3731
3732         case ISO_single_shift_3:
3733           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3734             goto invalid_code;
3735           /* SS2 is handled as an escape sequence of ESC 'O' */
3736           c1 = 'O';
3737           goto label_escape_sequence;
3738
3739         case ISO_control_sequence_introducer:
3740           /* CSI is handled as an escape sequence of ESC '[' ...  */
3741           c1 = '[';
3742           goto label_escape_sequence;
3743
3744         case ISO_escape:
3745           ONE_MORE_BYTE (c1);
3746         label_escape_sequence:
3747           /* Escape sequences handled here are invocation,
3748              designation, direction specification, and character
3749              composition specification.  */
3750           switch (c1)
3751             {
3752             case '&':           /* revision of following character set */
3753               ONE_MORE_BYTE (c1);
3754               if (!(c1 >= '@' && c1 <= '~'))
3755                 goto invalid_code;
3756               ONE_MORE_BYTE (c1);
3757               if (c1 != ISO_CODE_ESC)
3758                 goto invalid_code;
3759               ONE_MORE_BYTE (c1);
3760               goto label_escape_sequence;
3761
3762             case '$':           /* designation of 2-byte character set */
3763               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3764                 goto invalid_code;
3765               {
3766                 int reg, chars96;
3767
3768                 ONE_MORE_BYTE (c1);
3769                 if (c1 >= '@' && c1 <= 'B')
3770                   {     /* designation of JISX0208.1978, GB2312.1980,
3771                            or JISX0208.1980 */
3772                     reg = 0, chars96 = 0;
3773                   }
3774                 else if (c1 >= 0x28 && c1 <= 0x2B)
3775                   { /* designation of DIMENSION2_CHARS94 character set */
3776                     reg = c1 - 0x28, chars96 = 0;
3777                     ONE_MORE_BYTE (c1);
3778                   }
3779                 else if (c1 >= 0x2C && c1 <= 0x2F)
3780                   { /* designation of DIMENSION2_CHARS96 character set */
3781                     reg = c1 - 0x2C, chars96 = 1;
3782                     ONE_MORE_BYTE (c1);
3783                   }
3784                 else
3785                   goto invalid_code;
3786                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3787                 /* We must update these variables now.  */
3788                 if (reg == 0)
3789                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3790                 else if (reg == 1)
3791                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3792                 if (chars96 < 0)
3793                   goto invalid_code;
3794               }
3795               continue;
3796
3797             case 'n':           /* invocation of locking-shift-2 */
3798               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3799                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3800                 goto invalid_code;
3801               CODING_ISO_INVOCATION (coding, 0) = 2;
3802               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3803               continue;
3804
3805             case 'o':           /* invocation of locking-shift-3 */
3806               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3807                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3808                 goto invalid_code;
3809               CODING_ISO_INVOCATION (coding, 0) = 3;
3810               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3811               continue;
3812
3813             case 'N':           /* invocation of single-shift-2 */
3814               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3815                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3816                 goto invalid_code;
3817               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3818               if (charset_id_2 < 0)
3819                 charset = CHARSET_FROM_ID (charset_ascii);
3820               else
3821                 charset = CHARSET_FROM_ID (charset_id_2);
3822               ONE_MORE_BYTE (c1);
3823               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3824                 goto invalid_code;
3825               break;
3826
3827             case 'O':           /* invocation of single-shift-3 */
3828               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3829                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3830                 goto invalid_code;
3831               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3832               if (charset_id_3 < 0)
3833                 charset = CHARSET_FROM_ID (charset_ascii);
3834               else
3835                 charset = CHARSET_FROM_ID (charset_id_3);
3836               ONE_MORE_BYTE (c1);
3837               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3838                 goto invalid_code;
3839               break;
3840
3841             case '0': case '2': case '3': case '4': /* start composition */
3842               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3843                 goto invalid_code;
3844               if (last_id != charset_ascii)
3845                 {
3846                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3847                   last_id = charset_ascii;
3848                   last_offset = char_offset;
3849                 }
3850               DECODE_COMPOSITION_START (c1);
3851               continue;
3852
3853             case '1':           /* end composition */
3854               if (cmp_status->state == COMPOSING_NO)
3855                 goto invalid_code;
3856               DECODE_COMPOSITION_END ();
3857               continue;
3858
3859             case '[':           /* specification of direction */
3860               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3861                 goto invalid_code;
3862               /* For the moment, nested direction is not supported.
3863                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3864                  left-to-right, and nozero means right-to-left.  */
3865               ONE_MORE_BYTE (c1);
3866               switch (c1)
3867                 {
3868                 case ']':       /* end of the current direction */
3869                   coding->mode &= ~CODING_MODE_DIRECTION;
3870
3871                 case '0':       /* end of the current direction */
3872                 case '1':       /* start of left-to-right direction */
3873                   ONE_MORE_BYTE (c1);
3874                   if (c1 == ']')
3875                     coding->mode &= ~CODING_MODE_DIRECTION;
3876                   else
3877                     goto invalid_code;
3878                   break;
3879
3880                 case '2':       /* start of right-to-left direction */
3881                   ONE_MORE_BYTE (c1);
3882                   if (c1 == ']')
3883                     coding->mode |= CODING_MODE_DIRECTION;
3884                   else
3885                     goto invalid_code;
3886                   break;
3887
3888                 default:
3889                   goto invalid_code;
3890                 }
3891               continue;
3892
3893             case '%':
3894               ONE_MORE_BYTE (c1);
3895               if (c1 == '/')
3896                 {
3897                   /* CTEXT extended segment:
3898                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3899                      We keep these bytes as is for the moment.
3900                      They may be decoded by post-read-conversion.  */
3901                   int dim, M, L;
3902                   int size;
3903
3904                   ONE_MORE_BYTE (dim);
3905                   if (dim < 0 || dim > 4)
3906                     goto invalid_code;
3907                   ONE_MORE_BYTE (M);
3908                   if (M < 128)
3909                     goto invalid_code;
3910                   ONE_MORE_BYTE (L);
3911                   if (L < 128)
3912                     goto invalid_code;
3913                   size = ((M - 128) * 128) + (L - 128);
3914                   if (charbuf + 6 > charbuf_end)
3915                     goto break_loop;
3916                   *charbuf++ = ISO_CODE_ESC;
3917                   *charbuf++ = '%';
3918                   *charbuf++ = '/';
3919                   *charbuf++ = dim;
3920                   *charbuf++ = BYTE8_TO_CHAR (M);
3921                   *charbuf++ = BYTE8_TO_CHAR (L);
3922                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3923                 }
3924               else if (c1 == 'G')
3925                 {
3926                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3927                      ESC % G --UTF-8-BYTES-- ESC % @
3928                      We keep these bytes as is for the moment.
3929                      They may be decoded by post-read-conversion.  */
3930                   if (charbuf + 3 > charbuf_end)
3931                     goto break_loop;
3932                   *charbuf++ = ISO_CODE_ESC;
3933                   *charbuf++ = '%';
3934                   *charbuf++ = 'G';
3935                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3936                 }
3937               else
3938                 goto invalid_code;
3939               continue;
3940               break;
3941
3942             default:
3943               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3944                 goto invalid_code;
3945               {
3946                 int reg, chars96;
3947
3948                 if (c1 >= 0x28 && c1 <= 0x2B)
3949                   { /* designation of DIMENSION1_CHARS94 character set */
3950                     reg = c1 - 0x28, chars96 = 0;
3951                     ONE_MORE_BYTE (c1);
3952                   }
3953                 else if (c1 >= 0x2C && c1 <= 0x2F)
3954                   { /* designation of DIMENSION1_CHARS96 character set */
3955                     reg = c1 - 0x2C, chars96 = 1;
3956                     ONE_MORE_BYTE (c1);
3957                   }
3958                 else
3959                   goto invalid_code;
3960                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3961                 /* We must update these variables now.  */
3962                 if (reg == 0)
3963                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3964                 else if (reg == 1)
3965                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3966                 if (chars96 < 0)
3967                   goto invalid_code;
3968               }
3969               continue;
3970             }
3971         }
3972
3973       if (cmp_status->state == COMPOSING_NO
3974           && charset->id != charset_ascii
3975           && last_id != charset->id)
3976         {
3977           if (last_id != charset_ascii)
3978             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3979           last_id = charset->id;
3980           last_offset = char_offset;
3981         }
3982
3983       /* Now we know CHARSET and 1st position code C1 of a character.
3984          Produce a decoded character while getting 2nd and 3rd
3985          position codes C2, C3 if necessary.  */
3986       if (CHARSET_DIMENSION (charset) > 1)
3987         {
3988           ONE_MORE_BYTE (c2);
3989           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3990               || ((c1 & 0x80) != (c2 & 0x80)))
3991             /* C2 is not in a valid range.  */
3992             goto invalid_code;
3993           if (CHARSET_DIMENSION (charset) == 2)
3994             c1 = (c1 << 8) | c2;
3995           else
3996             {
3997               ONE_MORE_BYTE (c3);
3998               if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
3999                   || ((c1 & 0x80) != (c3 & 0x80)))
4000                 /* C3 is not in a valid range.  */
4001                 goto invalid_code;
4002               c1 = (c1 << 16) | (c2 << 8) | c2;
4003             }
4004         }
4005       c1 &= 0x7F7F7F;
4006       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4007       if (c < 0)
4008         {
4009           MAYBE_FINISH_COMPOSITION ();
4010           for (; src_base < src; src_base++, char_offset++)
4011             {
4012               if (ASCII_BYTE_P (*src_base))
4013                 *charbuf++ = *src_base;
4014               else
4015                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4016             }
4017         }
4018       else if (cmp_status->state == COMPOSING_NO)
4019         {
4020           *charbuf++ = c;
4021           char_offset++;
4022         }
4023       else if ((cmp_status->state == COMPOSING_CHAR
4024                 ? cmp_status->nchars
4025                 : cmp_status->ncomps)
4026                >= MAX_COMPOSITION_COMPONENTS)
4027         {
4028           /* Too long composition.  */
4029           MAYBE_FINISH_COMPOSITION ();
4030           *charbuf++ = c;
4031           char_offset++;
4032         }
4033       else
4034         STORE_COMPOSITION_CHAR (c);
4035       continue;
4036
4037     invalid_code:
4038       MAYBE_FINISH_COMPOSITION ();
4039       src = src_base;
4040       consumed_chars = consumed_chars_base;
4041       ONE_MORE_BYTE (c);
4042       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4043       char_offset++;
4044       coding->errors++;
4045       continue;
4046
4047     break_loop:
4048       break;
4049     }
4050
4051  no_more_source:
4052   if (cmp_status->state != COMPOSING_NO)
4053     {
4054       if (coding->mode & CODING_MODE_LAST_BLOCK)
4055         MAYBE_FINISH_COMPOSITION ();
4056       else
4057         {
4058           charbuf -= cmp_status->length;
4059           for (i = 0; i < cmp_status->length; i++)
4060             cmp_status->carryover[i] = charbuf[i];
4061         }
4062     }
4063   else if (last_id != charset_ascii)
4064     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4065   coding->consumed_char += consumed_chars_base;
4066   coding->consumed = src_base - coding->source;
4067   coding->charbuf_used = charbuf - coding->charbuf;
4068 }
4069
4070
4071 /* ISO2022 encoding stuff.  */
4072
4073 /*
4074    It is not enough to say just "ISO2022" on encoding, we have to
4075    specify more details.  In Emacs, each coding system of ISO2022
4076    variant has the following specifications:
4077         1. Initial designation to G0 thru G3.
4078         2. Allows short-form designation?
4079         3. ASCII should be designated to G0 before control characters?
4080         4. ASCII should be designated to G0 at end of line?
4081         5. 7-bit environment or 8-bit environment?
4082         6. Use locking-shift?
4083         7. Use Single-shift?
4084    And the following two are only for Japanese:
4085         8. Use ASCII in place of JIS0201-1976-Roman?
4086         9. Use JISX0208-1983 in place of JISX0208-1978?
4087    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4088    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4089    details.
4090 */
4091
4092 /* Produce codes (escape sequence) for designating CHARSET to graphic
4093    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4094    '@', 'A', or 'B' and the coding system CODING allows, produce
4095    designation sequence of short-form.  */
4096
4097 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4098   do {                                                                  \
4099     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4100     char *intermediate_char_94 = "()*+";                                \
4101     char *intermediate_char_96 = ",-./";                                \
4102     int revision = -1;                                                  \
4103     int c;                                                              \
4104                                                                         \
4105     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4106       revision = CHARSET_ISO_REVISION (charset);                        \
4107                                                                         \
4108     if (revision >= 0)                                                  \
4109       {                                                                 \
4110         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4111         EMIT_ONE_BYTE ('@' + revision);                                 \
4112       }                                                                 \
4113     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4114     if (CHARSET_DIMENSION (charset) == 1)                               \
4115       {                                                                 \
4116         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4117           c = intermediate_char_94[reg];                                \
4118         else                                                            \
4119           c = intermediate_char_96[reg];                                \
4120         EMIT_ONE_ASCII_BYTE (c);                                        \
4121       }                                                                 \
4122     else                                                                \
4123       {                                                                 \
4124         EMIT_ONE_ASCII_BYTE ('$');                                      \
4125         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4126           {                                                             \
4127             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4128                 || reg != 0                                             \
4129                 || final_char < '@' || final_char > 'B')                \
4130               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4131           }                                                             \
4132         else                                                            \
4133           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4134       }                                                                 \
4135     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4136                                                                         \
4137     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4138   } while (0)
4139
4140
4141 /* The following two macros produce codes (control character or escape
4142    sequence) for ISO2022 single-shift functions (single-shift-2 and
4143    single-shift-3).  */
4144
4145 #define ENCODE_SINGLE_SHIFT_2                                           \
4146   do {                                                                  \
4147     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4148       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4149     else                                                                \
4150       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4151     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4152   } while (0)
4153
4154
4155 #define ENCODE_SINGLE_SHIFT_3                                           \
4156   do {                                                                  \
4157     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4158       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4159     else                                                                \
4160       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4161     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4162   } while (0)
4163
4164
4165 /* The following four macros produce codes (control character or
4166    escape sequence) for ISO2022 locking-shift functions (shift-in,
4167    shift-out, locking-shift-2, and locking-shift-3).  */
4168
4169 #define ENCODE_SHIFT_IN                                 \
4170   do {                                                  \
4171     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4172     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4173   } while (0)
4174
4175
4176 #define ENCODE_SHIFT_OUT                                \
4177   do {                                                  \
4178     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4179     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4180   } while (0)
4181
4182
4183 #define ENCODE_LOCKING_SHIFT_2                          \
4184   do {                                                  \
4185     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4186     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4187   } while (0)
4188
4189
4190 #define ENCODE_LOCKING_SHIFT_3                          \
4191   do {                                                  \
4192     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4193     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4194   } while (0)
4195
4196
4197 /* Produce codes for a DIMENSION1 character whose character set is
4198    CHARSET and whose position-code is C1.  Designation and invocation
4199    sequences are also produced in advance if necessary.  */
4200
4201 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4202   do {                                                                  \
4203     int id = CHARSET_ID (charset);                                      \
4204                                                                         \
4205     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4206         && id == charset_ascii)                                         \
4207       {                                                                 \
4208         id = charset_jisx0201_roman;                                    \
4209         charset = CHARSET_FROM_ID (id);                                 \
4210       }                                                                 \
4211                                                                         \
4212     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4213       {                                                                 \
4214         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4215           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4216         else                                                            \
4217           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4218         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4219         break;                                                          \
4220       }                                                                 \
4221     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4222       {                                                                 \
4223         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4224         break;                                                          \
4225       }                                                                 \
4226     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4227       {                                                                 \
4228         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4229         break;                                                          \
4230       }                                                                 \
4231     else                                                                \
4232       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4233          must invoke it, or, at first, designate it to some graphic     \
4234          register.  Then repeat the loop to actually produce the        \
4235          character.  */                                                 \
4236       dst = encode_invocation_designation (charset, coding, dst,        \
4237                                            &produced_chars);            \
4238   } while (1)
4239
4240
4241 /* Produce codes for a DIMENSION2 character whose character set is
4242    CHARSET and whose position-codes are C1 and C2.  Designation and
4243    invocation codes are also produced in advance if necessary.  */
4244
4245 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4246   do {                                                                  \
4247     int id = CHARSET_ID (charset);                                      \
4248                                                                         \
4249     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4250         && id == charset_jisx0208)                                      \
4251       {                                                                 \
4252         id = charset_jisx0208_1978;                                     \
4253         charset = CHARSET_FROM_ID (id);                                 \
4254       }                                                                 \
4255                                                                         \
4256     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4257       {                                                                 \
4258         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4259           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4260         else                                                            \
4261           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4262         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4263         break;                                                          \
4264       }                                                                 \
4265     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4266       {                                                                 \
4267         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4268         break;                                                          \
4269       }                                                                 \
4270     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4271       {                                                                 \
4272         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4273         break;                                                          \
4274       }                                                                 \
4275     else                                                                \
4276       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4277          must invoke it, or, at first, designate it to some graphic     \
4278          register.  Then repeat the loop to actually produce the        \
4279          character.  */                                                 \
4280       dst = encode_invocation_designation (charset, coding, dst,        \
4281                                            &produced_chars);            \
4282   } while (1)
4283
4284
4285 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4286   do {                                                                     \
4287     int code = ENCODE_CHAR ((charset), (c));                               \
4288                                                                            \
4289     if (CHARSET_DIMENSION (charset) == 1)                                  \
4290       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4291     else                                                                   \
4292       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4293   } while (0)
4294
4295
4296 /* Produce designation and invocation codes at a place pointed by DST
4297    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4298    Return new DST.  */
4299
4300 unsigned char *
4301 encode_invocation_designation (struct charset *charset,
4302                                struct coding_system *coding,
4303                                unsigned char *dst, int *p_nchars)
4304 {
4305   int multibytep = coding->dst_multibyte;
4306   int produced_chars = *p_nchars;
4307   int reg;                      /* graphic register number */
4308   int id = CHARSET_ID (charset);
4309
4310   /* At first, check designations.  */
4311   for (reg = 0; reg < 4; reg++)
4312     if (id == CODING_ISO_DESIGNATION (coding, reg))
4313       break;
4314
4315   if (reg >= 4)
4316     {
4317       /* CHARSET is not yet designated to any graphic registers.  */
4318       /* At first check the requested designation.  */
4319       reg = CODING_ISO_REQUEST (coding, id);
4320       if (reg < 0)
4321         /* Since CHARSET requests no special designation, designate it
4322            to graphic register 0.  */
4323         reg = 0;
4324
4325       ENCODE_DESIGNATION (charset, reg, coding);
4326     }
4327
4328   if (CODING_ISO_INVOCATION (coding, 0) != reg
4329       && CODING_ISO_INVOCATION (coding, 1) != reg)
4330     {
4331       /* Since the graphic register REG is not invoked to any graphic
4332          planes, invoke it to graphic plane 0.  */
4333       switch (reg)
4334         {
4335         case 0:                 /* graphic register 0 */
4336           ENCODE_SHIFT_IN;
4337           break;
4338
4339         case 1:                 /* graphic register 1 */
4340           ENCODE_SHIFT_OUT;
4341           break;
4342
4343         case 2:                 /* graphic register 2 */
4344           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4345             ENCODE_SINGLE_SHIFT_2;
4346           else
4347             ENCODE_LOCKING_SHIFT_2;
4348           break;
4349
4350         case 3:                 /* graphic register 3 */
4351           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4352             ENCODE_SINGLE_SHIFT_3;
4353           else
4354             ENCODE_LOCKING_SHIFT_3;
4355           break;
4356         }
4357     }
4358
4359   *p_nchars = produced_chars;
4360   return dst;
4361 }
4362
4363 /* The following three macros produce codes for indicating direction
4364    of text.  */
4365 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
4366   do {                                                                  \
4367     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
4368       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
4369     else                                                                \
4370       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
4371   } while (0)
4372
4373
4374 #define ENCODE_DIRECTION_R2L()                  \
4375   do {                                          \
4376     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4377     EMIT_TWO_ASCII_BYTES ('2', ']');            \
4378   } while (0)
4379
4380
4381 #define ENCODE_DIRECTION_L2R()                  \
4382   do {                                          \
4383     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4384     EMIT_TWO_ASCII_BYTES ('0', ']');            \
4385   } while (0)
4386
4387
4388 /* Produce codes for designation and invocation to reset the graphic
4389    planes and registers to initial state.  */
4390 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4391   do {                                                                  \
4392     int reg;                                                            \
4393     struct charset *charset;                                            \
4394                                                                         \
4395     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4396       ENCODE_SHIFT_IN;                                                  \
4397     for (reg = 0; reg < 4; reg++)                                       \
4398       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4399           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4400               != CODING_ISO_INITIAL (coding, reg)))                     \
4401         {                                                               \
4402           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4403           ENCODE_DESIGNATION (charset, reg, coding);                    \
4404         }                                                               \
4405   } while (0)
4406
4407
4408 /* Produce designation sequences of charsets in the line started from
4409    SRC to a place pointed by DST, and return updated DST.
4410
4411    If the current block ends before any end-of-line, we may fail to
4412    find all the necessary designations.  */
4413
4414 static unsigned char *
4415 encode_designation_at_bol (struct coding_system *coding, int *charbuf,
4416                            int *charbuf_end, unsigned char *dst)
4417 {
4418   struct charset *charset;
4419   /* Table of charsets to be designated to each graphic register.  */
4420   int r[4];
4421   int c, found = 0, reg;
4422   int produced_chars = 0;
4423   int multibytep = coding->dst_multibyte;
4424   Lisp_Object attrs;
4425   Lisp_Object charset_list;
4426
4427   attrs = CODING_ID_ATTRS (coding->id);
4428   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4429   if (EQ (charset_list, Qiso_2022))
4430     charset_list = Viso_2022_charset_list;
4431
4432   for (reg = 0; reg < 4; reg++)
4433     r[reg] = -1;
4434
4435   while (found < 4)
4436     {
4437       int id;
4438
4439       c = *charbuf++;
4440       if (c == '\n')
4441         break;
4442       charset = char_charset (c, charset_list, NULL);
4443       id = CHARSET_ID (charset);
4444       reg = CODING_ISO_REQUEST (coding, id);
4445       if (reg >= 0 && r[reg] < 0)
4446         {
4447           found++;
4448           r[reg] = id;
4449         }
4450     }
4451
4452   if (found)
4453     {
4454       for (reg = 0; reg < 4; reg++)
4455         if (r[reg] >= 0
4456             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4457           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4458     }
4459
4460   return dst;
4461 }
4462
4463 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4464
4465 static int
4466 encode_coding_iso_2022 (struct coding_system *coding)
4467 {
4468   int multibytep = coding->dst_multibyte;
4469   int *charbuf = coding->charbuf;
4470   int *charbuf_end = charbuf + coding->charbuf_used;
4471   unsigned char *dst = coding->destination + coding->produced;
4472   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4473   int safe_room = 16;
4474   int bol_designation
4475     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4476        && CODING_ISO_BOL (coding));
4477   int produced_chars = 0;
4478   Lisp_Object attrs, eol_type, charset_list;
4479   int ascii_compatible;
4480   int c;
4481   int preferred_charset_id = -1;
4482
4483   CODING_GET_INFO (coding, attrs, charset_list);
4484   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4485   if (VECTORP (eol_type))
4486     eol_type = Qunix;
4487
4488   setup_iso_safe_charsets (attrs);
4489   /* Charset list may have been changed.  */
4490   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4491   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4492
4493   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4494
4495   while (charbuf < charbuf_end)
4496     {
4497       ASSURE_DESTINATION (safe_room);
4498
4499       if (bol_designation)
4500         {
4501           unsigned char *dst_prev = dst;
4502
4503           /* We have to produce designation sequences if any now.  */
4504           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4505           bol_designation = 0;
4506           /* We are sure that designation sequences are all ASCII bytes.  */
4507           produced_chars += dst - dst_prev;
4508         }
4509
4510       c = *charbuf++;
4511
4512       if (c < 0)
4513         {
4514           /* Handle an annotation.  */
4515           switch (*charbuf)
4516             {
4517             case CODING_ANNOTATE_COMPOSITION_MASK:
4518               /* Not yet implemented.  */
4519               break;
4520             case CODING_ANNOTATE_CHARSET_MASK:
4521               preferred_charset_id = charbuf[2];
4522               if (preferred_charset_id >= 0
4523                   && NILP (Fmemq (make_number (preferred_charset_id),
4524                                   charset_list)))
4525                 preferred_charset_id = -1;
4526               break;
4527             default:
4528               abort ();
4529             }
4530           charbuf += -c - 1;
4531           continue;
4532         }
4533
4534       /* Now encode the character C.  */
4535       if (c < 0x20 || c == 0x7F)
4536         {
4537           if (c == '\n'
4538               || (c == '\r' && EQ (eol_type, Qmac)))
4539             {
4540               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4541                 ENCODE_RESET_PLANE_AND_REGISTER ();
4542               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4543                 {
4544                   int i;
4545
4546                   for (i = 0; i < 4; i++)
4547                     CODING_ISO_DESIGNATION (coding, i)
4548                       = CODING_ISO_INITIAL (coding, i);
4549                 }
4550               bol_designation
4551                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4552             }
4553           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4554             ENCODE_RESET_PLANE_AND_REGISTER ();
4555           EMIT_ONE_ASCII_BYTE (c);
4556         }
4557       else if (ASCII_CHAR_P (c))
4558         {
4559           if (ascii_compatible)
4560             EMIT_ONE_ASCII_BYTE (c);
4561           else
4562             {
4563               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4564               ENCODE_ISO_CHARACTER (charset, c);
4565             }
4566         }
4567       else if (CHAR_BYTE8_P (c))
4568         {
4569           c = CHAR_TO_BYTE8 (c);
4570           EMIT_ONE_BYTE (c);
4571         }
4572       else
4573         {
4574           struct charset *charset;
4575
4576           if (preferred_charset_id >= 0)
4577             {
4578               charset = CHARSET_FROM_ID (preferred_charset_id);
4579               if (! CHAR_CHARSET_P (c, charset))
4580                 charset = char_charset (c, charset_list, NULL);
4581             }
4582           else
4583             charset = char_charset (c, charset_list, NULL);
4584           if (!charset)
4585             {
4586               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4587                 {
4588                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4589                   charset = CHARSET_FROM_ID (charset_ascii);
4590                 }
4591               else
4592                 {
4593                   c = coding->default_char;
4594                   charset = char_charset (c, charset_list, NULL);
4595                 }
4596             }
4597           ENCODE_ISO_CHARACTER (charset, c);
4598         }
4599     }
4600
4601   if (coding->mode & CODING_MODE_LAST_BLOCK
4602       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4603     {
4604       ASSURE_DESTINATION (safe_room);
4605       ENCODE_RESET_PLANE_AND_REGISTER ();
4606     }
4607   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4608   CODING_ISO_BOL (coding) = bol_designation;
4609   coding->produced_char += produced_chars;
4610   coding->produced = dst - coding->destination;
4611   return 0;
4612 }
4613
4614 \f
4615 /*** 8,9. SJIS and BIG5 handlers ***/
4616
4617 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4618    quite widely.  So, for the moment, Emacs supports them in the bare
4619    C code.  But, in the future, they may be supported only by CCL.  */
4620
4621 /* SJIS is a coding system encoding three character sets: ASCII, right
4622    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4623    as is.  A character of charset katakana-jisx0201 is encoded by
4624    "position-code + 0x80".  A character of charset japanese-jisx0208
4625    is encoded in 2-byte but two position-codes are divided and shifted
4626    so that it fit in the range below.
4627
4628    --- CODE RANGE of SJIS ---
4629    (character set)      (range)
4630    ASCII                0x00 .. 0x7F
4631    KATAKANA-JISX0201    0xA0 .. 0xDF
4632    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4633             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4634    -------------------------------
4635
4636 */
4637
4638 /* BIG5 is a coding system encoding two character sets: ASCII and
4639    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4640    character set and is encoded in two-byte.
4641
4642    --- CODE RANGE of BIG5 ---
4643    (character set)      (range)
4644    ASCII                0x00 .. 0x7F
4645    Big5 (1st byte)      0xA1 .. 0xFE
4646         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4647    --------------------------
4648
4649   */
4650
4651 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4652    Check if a text is encoded in SJIS.  If it is, return
4653    CATEGORY_MASK_SJIS, else return 0.  */
4654
4655 static int
4656 detect_coding_sjis (struct coding_system *coding,
4657                     struct coding_detection_info *detect_info)
4658 {
4659   const unsigned char *src = coding->source, *src_base;
4660   const unsigned char *src_end = coding->source + coding->src_bytes;
4661   int multibytep = coding->src_multibyte;
4662   int consumed_chars = 0;
4663   int found = 0;
4664   int c;
4665   Lisp_Object attrs, charset_list;
4666   int max_first_byte_of_2_byte_code;
4667
4668   CODING_GET_INFO (coding, attrs, charset_list);
4669   max_first_byte_of_2_byte_code
4670     = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4671
4672   detect_info->checked |= CATEGORY_MASK_SJIS;
4673   /* A coding system of this category is always ASCII compatible.  */
4674   src += coding->head_ascii;
4675
4676   while (1)
4677     {
4678       src_base = src;
4679       ONE_MORE_BYTE (c);
4680       if (c < 0x80)
4681         continue;
4682       if ((c >= 0x81 && c <= 0x9F)
4683           || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4684         {
4685           ONE_MORE_BYTE (c);
4686           if (c < 0x40 || c == 0x7F || c > 0xFC)
4687             break;
4688           found = CATEGORY_MASK_SJIS;
4689         }
4690       else if (c >= 0xA0 && c < 0xE0)
4691         found = CATEGORY_MASK_SJIS;
4692       else
4693         break;
4694     }
4695   detect_info->rejected |= CATEGORY_MASK_SJIS;
4696   return 0;
4697
4698  no_more_source:
4699   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4700     {
4701       detect_info->rejected |= CATEGORY_MASK_SJIS;
4702       return 0;
4703     }
4704   detect_info->found |= found;
4705   return 1;
4706 }
4707
4708 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4709    Check if a text is encoded in BIG5.  If it is, return
4710    CATEGORY_MASK_BIG5, else return 0.  */
4711
4712 static int
4713 detect_coding_big5 (struct coding_system *coding,
4714                     struct coding_detection_info *detect_info)
4715 {
4716   const unsigned char *src = coding->source, *src_base;
4717   const unsigned char *src_end = coding->source + coding->src_bytes;
4718   int multibytep = coding->src_multibyte;
4719   int consumed_chars = 0;
4720   int found = 0;
4721   int c;
4722
4723   detect_info->checked |= CATEGORY_MASK_BIG5;
4724   /* A coding system of this category is always ASCII compatible.  */
4725   src += coding->head_ascii;
4726
4727   while (1)
4728     {
4729       src_base = src;
4730       ONE_MORE_BYTE (c);
4731       if (c < 0x80)
4732         continue;
4733       if (c >= 0xA1)
4734         {
4735           ONE_MORE_BYTE (c);
4736           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4737             return 0;
4738           found = CATEGORY_MASK_BIG5;
4739         }
4740       else
4741         break;
4742     }
4743   detect_info->rejected |= CATEGORY_MASK_BIG5;
4744   return 0;
4745
4746  no_more_source:
4747   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4748     {
4749       detect_info->rejected |= CATEGORY_MASK_BIG5;
4750       return 0;
4751     }
4752   detect_info->found |= found;
4753   return 1;
4754 }
4755
4756 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4757    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4758
4759 static void
4760 decode_coding_sjis (struct coding_system *coding)
4761 {
4762   const unsigned char *src = coding->source + coding->consumed;
4763   const unsigned char *src_end = coding->source + coding->src_bytes;
4764   const unsigned char *src_base;
4765   int *charbuf = coding->charbuf + coding->charbuf_used;
4766   /* We may produce one charset annocation in one loop and one more at
4767      the end.  */
4768   int *charbuf_end
4769     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4770   int consumed_chars = 0, consumed_chars_base;
4771   int multibytep = coding->src_multibyte;
4772   struct charset *charset_roman, *charset_kanji, *charset_kana;
4773   struct charset *charset_kanji2;
4774   Lisp_Object attrs, charset_list, val;
4775   int char_offset = coding->produced_char;
4776   int last_offset = char_offset;
4777   int last_id = charset_ascii;
4778   int eol_crlf =
4779     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4780   int byte_after_cr = -1;
4781
4782   CODING_GET_INFO (coding, attrs, charset_list);
4783
4784   val = charset_list;
4785   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4786   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4787   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4788   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4789
4790   while (1)
4791     {
4792       int c, c1;
4793       struct charset *charset;
4794
4795       src_base = src;
4796       consumed_chars_base = consumed_chars;
4797
4798       if (charbuf >= charbuf_end)
4799         {
4800           if (byte_after_cr >= 0)
4801             src_base--;
4802           break;
4803         }
4804
4805       if (byte_after_cr >= 0)
4806         c = byte_after_cr, byte_after_cr = -1;
4807       else
4808         ONE_MORE_BYTE (c);
4809       if (c < 0)
4810         goto invalid_code;
4811       if (c < 0x80)
4812         {
4813           if (eol_crlf && c == '\r')
4814             ONE_MORE_BYTE (byte_after_cr);
4815           charset = charset_roman;
4816         }
4817       else if (c == 0x80 || c == 0xA0)
4818         goto invalid_code;
4819       else if (c >= 0xA1 && c <= 0xDF)
4820         {
4821           /* SJIS -> JISX0201-Kana */
4822           c &= 0x7F;
4823           charset = charset_kana;
4824         }
4825       else if (c <= 0xEF)
4826         {
4827           /* SJIS -> JISX0208 */
4828           ONE_MORE_BYTE (c1);
4829           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4830             goto invalid_code;
4831           c = (c << 8) | c1;
4832           SJIS_TO_JIS (c);
4833           charset = charset_kanji;
4834         }
4835       else if (c <= 0xFC && charset_kanji2)
4836         {
4837           /* SJIS -> JISX0213-2 */
4838           ONE_MORE_BYTE (c1);
4839           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4840             goto invalid_code;
4841           c = (c << 8) | c1;
4842           SJIS_TO_JIS2 (c);
4843           charset = charset_kanji2;
4844         }
4845       else
4846         goto invalid_code;
4847       if (charset->id != charset_ascii
4848           && last_id != charset->id)
4849         {
4850           if (last_id != charset_ascii)
4851             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4852           last_id = charset->id;
4853           last_offset = char_offset;
4854         }
4855       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4856       *charbuf++ = c;
4857       char_offset++;
4858       continue;
4859
4860     invalid_code:
4861       src = src_base;
4862       consumed_chars = consumed_chars_base;
4863       ONE_MORE_BYTE (c);
4864       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4865       char_offset++;
4866       coding->errors++;
4867     }
4868
4869  no_more_source:
4870   if (last_id != charset_ascii)
4871     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4872   coding->consumed_char += consumed_chars_base;
4873   coding->consumed = src_base - coding->source;
4874   coding->charbuf_used = charbuf - coding->charbuf;
4875 }
4876
4877 static void
4878 decode_coding_big5 (struct coding_system *coding)
4879 {
4880   const unsigned char *src = coding->source + coding->consumed;
4881   const unsigned char *src_end = coding->source + coding->src_bytes;
4882   const unsigned char *src_base;
4883   int *charbuf = coding->charbuf + coding->charbuf_used;
4884   /* We may produce one charset annocation in one loop and one more at
4885      the end.  */
4886   int *charbuf_end
4887     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4888   int consumed_chars = 0, consumed_chars_base;
4889   int multibytep = coding->src_multibyte;
4890   struct charset *charset_roman, *charset_big5;
4891   Lisp_Object attrs, charset_list, val;
4892   int char_offset = coding->produced_char;
4893   int last_offset = char_offset;
4894   int last_id = charset_ascii;
4895   int eol_crlf =
4896     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4897   int byte_after_cr = -1;
4898
4899   CODING_GET_INFO (coding, attrs, charset_list);
4900   val = charset_list;
4901   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4902   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4903
4904   while (1)
4905     {
4906       int c, c1;
4907       struct charset *charset;
4908
4909       src_base = src;
4910       consumed_chars_base = consumed_chars;
4911
4912       if (charbuf >= charbuf_end)
4913         {
4914           if (byte_after_cr >= 0)
4915             src_base--;
4916           break;
4917         }
4918
4919       if (byte_after_cr >= 0)
4920         c = byte_after_cr, byte_after_cr = -1;
4921       else
4922         ONE_MORE_BYTE (c);
4923
4924       if (c < 0)
4925         goto invalid_code;
4926       if (c < 0x80)
4927         {
4928           if (eol_crlf && c == '\r')
4929             ONE_MORE_BYTE (byte_after_cr);
4930           charset = charset_roman;
4931         }
4932       else
4933         {
4934           /* BIG5 -> Big5 */
4935           if (c < 0xA1 || c > 0xFE)
4936             goto invalid_code;
4937           ONE_MORE_BYTE (c1);
4938           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4939             goto invalid_code;
4940           c = c << 8 | c1;
4941           charset = charset_big5;
4942         }
4943       if (charset->id != charset_ascii
4944           && last_id != charset->id)
4945         {
4946           if (last_id != charset_ascii)
4947             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4948           last_id = charset->id;
4949           last_offset = char_offset;
4950         }
4951       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4952       *charbuf++ = c;
4953       char_offset++;
4954       continue;
4955
4956     invalid_code:
4957       src = src_base;
4958       consumed_chars = consumed_chars_base;
4959       ONE_MORE_BYTE (c);
4960       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4961       char_offset++;
4962       coding->errors++;
4963     }
4964
4965  no_more_source:
4966   if (last_id != charset_ascii)
4967     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4968   coding->consumed_char += consumed_chars_base;
4969   coding->consumed = src_base - coding->source;
4970   coding->charbuf_used = charbuf - coding->charbuf;
4971 }
4972
4973 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4974    This function can encode charsets `ascii', `katakana-jisx0201',
4975    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4976    are sure that all these charsets are registered as official charset
4977    (i.e. do not have extended leading-codes).  Characters of other
4978    charsets are produced without any encoding.  If SJIS_P is 1, encode
4979    SJIS text, else encode BIG5 text.  */
4980
4981 static int
4982 encode_coding_sjis (struct coding_system *coding)
4983 {
4984   int multibytep = coding->dst_multibyte;
4985   int *charbuf = coding->charbuf;
4986   int *charbuf_end = charbuf + coding->charbuf_used;
4987   unsigned char *dst = coding->destination + coding->produced;
4988   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4989   int safe_room = 4;
4990   int produced_chars = 0;
4991   Lisp_Object attrs, charset_list, val;
4992   int ascii_compatible;
4993   struct charset *charset_roman, *charset_kanji, *charset_kana;
4994   struct charset *charset_kanji2;
4995   int c;
4996
4997   CODING_GET_INFO (coding, attrs, charset_list);
4998   val = charset_list;
4999   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5000   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5001   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5002   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
5003
5004   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5005
5006   while (charbuf < charbuf_end)
5007     {
5008       ASSURE_DESTINATION (safe_room);
5009       c = *charbuf++;
5010       /* Now encode the character C.  */
5011       if (ASCII_CHAR_P (c) && ascii_compatible)
5012         EMIT_ONE_ASCII_BYTE (c);
5013       else if (CHAR_BYTE8_P (c))
5014         {
5015           c = CHAR_TO_BYTE8 (c);
5016           EMIT_ONE_BYTE (c);
5017         }
5018       else
5019         {
5020           unsigned code;
5021           struct charset *charset = char_charset (c, charset_list, &code);
5022
5023           if (!charset)
5024             {
5025               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5026                 {
5027                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5028                   charset = CHARSET_FROM_ID (charset_ascii);
5029                 }
5030               else
5031                 {
5032                   c = coding->default_char;
5033                   charset = char_charset (c, charset_list, &code);
5034                 }
5035             }
5036           if (code == CHARSET_INVALID_CODE (charset))
5037             abort ();
5038           if (charset == charset_kanji)
5039             {
5040               int c1, c2;
5041               JIS_TO_SJIS (code);
5042               c1 = code >> 8, c2 = code & 0xFF;
5043               EMIT_TWO_BYTES (c1, c2);
5044             }
5045           else if (charset == charset_kana)
5046             EMIT_ONE_BYTE (code | 0x80);
5047           else if (charset_kanji2 && charset == charset_kanji2)
5048             {
5049               int c1, c2;
5050
5051               c1 = code >> 8;
5052               if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5053                   || c1 == 0x28
5054                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5055                 {
5056                   JIS_TO_SJIS2 (code);
5057                   c1 = code >> 8, c2 = code & 0xFF;
5058                   EMIT_TWO_BYTES (c1, c2);
5059                 }
5060               else
5061                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5062             }
5063           else
5064             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5065         }
5066     }
5067   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5068   coding->produced_char += produced_chars;
5069   coding->produced = dst - coding->destination;
5070   return 0;
5071 }
5072
5073 static int
5074 encode_coding_big5 (struct coding_system *coding)
5075 {
5076   int multibytep = coding->dst_multibyte;
5077   int *charbuf = coding->charbuf;
5078   int *charbuf_end = charbuf + coding->charbuf_used;
5079   unsigned char *dst = coding->destination + coding->produced;
5080   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5081   int safe_room = 4;
5082   int produced_chars = 0;
5083   Lisp_Object attrs, charset_list, val;
5084   int ascii_compatible;
5085   struct charset *charset_roman, *charset_big5;
5086   int c;
5087
5088   CODING_GET_INFO (coding, attrs, charset_list);
5089   val = charset_list;
5090   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5091   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5092   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5093
5094   while (charbuf < charbuf_end)
5095     {
5096       ASSURE_DESTINATION (safe_room);
5097       c = *charbuf++;
5098       /* Now encode the character C.  */
5099       if (ASCII_CHAR_P (c) && ascii_compatible)
5100         EMIT_ONE_ASCII_BYTE (c);
5101       else if (CHAR_BYTE8_P (c))
5102         {
5103           c = CHAR_TO_BYTE8 (c);
5104           EMIT_ONE_BYTE (c);
5105         }
5106       else
5107         {
5108           unsigned code;
5109           struct charset *charset = char_charset (c, charset_list, &code);
5110
5111           if (! charset)
5112             {
5113               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5114                 {
5115                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5116                   charset = CHARSET_FROM_ID (charset_ascii);
5117                 }
5118               else
5119                 {
5120                   c = coding->default_char;
5121                   charset = char_charset (c, charset_list, &code);
5122                 }
5123             }
5124           if (code == CHARSET_INVALID_CODE (charset))
5125             abort ();
5126           if (charset == charset_big5)
5127             {
5128               int c1, c2;
5129
5130               c1 = code >> 8, c2 = code & 0xFF;
5131               EMIT_TWO_BYTES (c1, c2);
5132             }
5133           else
5134             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5135         }
5136     }
5137   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5138   coding->produced_char += produced_chars;
5139   coding->produced = dst - coding->destination;
5140   return 0;
5141 }
5142
5143 \f
5144 /*** 10. CCL handlers ***/
5145
5146 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5147    Check if a text is encoded in a coding system of which
5148    encoder/decoder are written in CCL program.  If it is, return
5149    CATEGORY_MASK_CCL, else return 0.  */
5150
5151 static int
5152 detect_coding_ccl (struct coding_system *coding,
5153                    struct coding_detection_info *detect_info)
5154 {
5155   const unsigned char *src = coding->source, *src_base;
5156   const unsigned char *src_end = coding->source + coding->src_bytes;
5157   int multibytep = coding->src_multibyte;
5158   int consumed_chars = 0;
5159   int found = 0;
5160   unsigned char *valids;
5161   int head_ascii = coding->head_ascii;
5162   Lisp_Object attrs;
5163
5164   detect_info->checked |= CATEGORY_MASK_CCL;
5165
5166   coding = &coding_categories[coding_category_ccl];
5167   valids = CODING_CCL_VALIDS (coding);
5168   attrs = CODING_ID_ATTRS (coding->id);
5169   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5170     src += head_ascii;
5171
5172   while (1)
5173     {
5174       int c;
5175
5176       src_base = src;
5177       ONE_MORE_BYTE (c);
5178       if (c < 0 || ! valids[c])
5179         break;
5180       if ((valids[c] > 1))
5181         found = CATEGORY_MASK_CCL;
5182     }
5183   detect_info->rejected |= CATEGORY_MASK_CCL;
5184   return 0;
5185
5186  no_more_source:
5187   detect_info->found |= found;
5188   return 1;
5189 }
5190
5191 static void
5192 decode_coding_ccl (struct coding_system *coding)
5193 {
5194   const unsigned char *src = coding->source + coding->consumed;
5195   const unsigned char *src_end = coding->source + coding->src_bytes;
5196   int *charbuf = coding->charbuf + coding->charbuf_used;
5197   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5198   int consumed_chars = 0;
5199   int multibytep = coding->src_multibyte;
5200   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5201   int source_charbuf[1024];
5202   int source_byteidx[1025];
5203   Lisp_Object attrs, charset_list;
5204
5205   CODING_GET_INFO (coding, attrs, charset_list);
5206
5207   while (1)
5208     {
5209       const unsigned char *p = src;
5210       int i = 0;
5211
5212       if (multibytep)
5213         {
5214           while (i < 1024 && p < src_end)
5215             {
5216               source_byteidx[i] = p - src;
5217               source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5218             }
5219           source_byteidx[i] = p - src;
5220         }
5221       else
5222         while (i < 1024 && p < src_end)
5223           source_charbuf[i++] = *p++;
5224
5225       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5226         ccl->last_block = 1;
5227       ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5228                   charset_list);
5229       charbuf += ccl->produced;
5230       if (multibytep)
5231         src += source_byteidx[ccl->consumed];
5232       else
5233         src += ccl->consumed;
5234       consumed_chars += ccl->consumed;
5235       if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
5236         break;
5237     }
5238
5239   switch (ccl->status)
5240     {
5241     case CCL_STAT_SUSPEND_BY_SRC:
5242       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5243       break;
5244     case CCL_STAT_SUSPEND_BY_DST:
5245       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5246       break;
5247     case CCL_STAT_QUIT:
5248     case CCL_STAT_INVALID_CMD:
5249       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5250       break;
5251     default:
5252       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5253       break;
5254     }
5255   coding->consumed_char += consumed_chars;
5256   coding->consumed = src - coding->source;
5257   coding->charbuf_used = charbuf - coding->charbuf;
5258 }
5259
5260 static int
5261 encode_coding_ccl (struct coding_system *coding)
5262 {
5263   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5264   int multibytep = coding->dst_multibyte;
5265   int *charbuf = coding->charbuf;
5266   int *charbuf_end = charbuf + coding->charbuf_used;
5267   unsigned char *dst = coding->destination + coding->produced;
5268   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5269   int destination_charbuf[1024];
5270   int i, produced_chars = 0;
5271   Lisp_Object attrs, charset_list;
5272
5273   CODING_GET_INFO (coding, attrs, charset_list);
5274   if (coding->consumed_char == coding->src_chars
5275       && coding->mode & CODING_MODE_LAST_BLOCK)
5276     ccl->last_block = 1;
5277
5278   while (charbuf < charbuf_end)
5279     {
5280       ccl_driver (ccl, charbuf, destination_charbuf,
5281                   charbuf_end - charbuf, 1024, charset_list);
5282       if (multibytep)
5283         {
5284           ASSURE_DESTINATION (ccl->produced * 2);
5285           for (i = 0; i < ccl->produced; i++)
5286             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5287         }
5288       else
5289         {
5290           ASSURE_DESTINATION (ccl->produced);
5291           for (i = 0; i < ccl->produced; i++)
5292             *dst++ = destination_charbuf[i] & 0xFF;
5293           produced_chars += ccl->produced;
5294         }
5295       charbuf += ccl->consumed;
5296       if (ccl->status == CCL_STAT_QUIT
5297           || ccl->status == CCL_STAT_INVALID_CMD)
5298         break;
5299     }
5300
5301   switch (ccl->status)
5302     {
5303     case CCL_STAT_SUSPEND_BY_SRC:
5304       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5305       break;
5306     case CCL_STAT_SUSPEND_BY_DST:
5307       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5308       break;
5309     case CCL_STAT_QUIT:
5310     case CCL_STAT_INVALID_CMD:
5311       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5312       break;
5313     default:
5314       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5315       break;
5316     }
5317
5318   coding->produced_char += produced_chars;
5319   coding->produced = dst - coding->destination;
5320   return 0;
5321 }
5322
5323
5324 \f
5325 /*** 10, 11. no-conversion handlers ***/
5326
5327 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5328
5329 static void
5330 decode_coding_raw_text (struct coding_system *coding)
5331 {
5332   int eol_crlf =
5333     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5334
5335   coding->chars_at_source = 1;
5336   coding->consumed_char = coding->src_chars;
5337   coding->consumed = coding->src_bytes;
5338   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5339     {
5340       coding->consumed_char--;
5341       coding->consumed--;
5342       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5343     }
5344   else
5345     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5346 }
5347
5348 static int
5349 encode_coding_raw_text (struct coding_system *coding)
5350 {
5351   int multibytep = coding->dst_multibyte;
5352   int *charbuf = coding->charbuf;
5353   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5354   unsigned char *dst = coding->destination + coding->produced;
5355   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5356   int produced_chars = 0;
5357   int c;
5358
5359   if (multibytep)
5360     {
5361       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5362
5363       if (coding->src_multibyte)
5364         while (charbuf < charbuf_end)
5365           {
5366             ASSURE_DESTINATION (safe_room);
5367             c = *charbuf++;
5368             if (ASCII_CHAR_P (c))
5369               EMIT_ONE_ASCII_BYTE (c);
5370             else if (CHAR_BYTE8_P (c))
5371               {
5372                 c = CHAR_TO_BYTE8 (c);
5373                 EMIT_ONE_BYTE (c);
5374               }
5375             else
5376               {
5377                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5378
5379                 CHAR_STRING_ADVANCE (c, p1);
5380                 while (p0 < p1)
5381                   {
5382                     EMIT_ONE_BYTE (*p0);
5383                     p0++;
5384                   }
5385               }
5386           }
5387       else
5388         while (charbuf < charbuf_end)
5389           {
5390             ASSURE_DESTINATION (safe_room);
5391             c = *charbuf++;
5392             EMIT_ONE_BYTE (c);
5393           }
5394     }
5395   else
5396     {
5397       if (coding->src_multibyte)
5398         {
5399           int safe_room = MAX_MULTIBYTE_LENGTH;
5400
5401           while (charbuf < charbuf_end)
5402             {
5403               ASSURE_DESTINATION (safe_room);
5404               c = *charbuf++;
5405               if (ASCII_CHAR_P (c))
5406                 *dst++ = c;
5407               else if (CHAR_BYTE8_P (c))
5408                 *dst++ = CHAR_TO_BYTE8 (c);
5409               else
5410                 CHAR_STRING_ADVANCE (c, dst);
5411             }
5412         }
5413       else
5414         {
5415           ASSURE_DESTINATION (charbuf_end - charbuf);
5416           while (charbuf < charbuf_end && dst < dst_end)
5417             *dst++ = *charbuf++;
5418         }
5419       produced_chars = dst - (coding->destination + coding->produced);
5420     }
5421   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5422   coding->produced_char += produced_chars;
5423   coding->produced = dst - coding->destination;
5424   return 0;
5425 }
5426
5427 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5428    Check if a text is encoded in a charset-based coding system.  If it
5429    is, return 1, else return 0.  */
5430
5431 static int
5432 detect_coding_charset (struct coding_system *coding,
5433                        struct coding_detection_info *detect_info)
5434 {
5435   const unsigned char *src = coding->source, *src_base;
5436   const unsigned char *src_end = coding->source + coding->src_bytes;
5437   int multibytep = coding->src_multibyte;
5438   int consumed_chars = 0;
5439   Lisp_Object attrs, valids, name;
5440   int found = 0;
5441   int head_ascii = coding->head_ascii;
5442   int check_latin_extra = 0;
5443
5444   detect_info->checked |= CATEGORY_MASK_CHARSET;
5445
5446   coding = &coding_categories[coding_category_charset];
5447   attrs = CODING_ID_ATTRS (coding->id);
5448   valids = AREF (attrs, coding_attr_charset_valids);
5449   name = CODING_ID_NAME (coding->id);
5450   if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5451                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5452       || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5453                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5454     check_latin_extra = 1;
5455
5456   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5457     src += head_ascii;
5458
5459   while (1)
5460     {
5461       int c;
5462       Lisp_Object val;
5463       struct charset *charset;
5464       int dim, idx;
5465
5466       src_base = src;
5467       ONE_MORE_BYTE (c);
5468       if (c < 0)
5469         continue;
5470       val = AREF (valids, c);
5471       if (NILP (val))
5472         break;
5473       if (c >= 0x80)
5474         {
5475           if (c < 0xA0
5476               && check_latin_extra
5477               && (!VECTORP (Vlatin_extra_code_table)
5478                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5479             break;
5480           found = CATEGORY_MASK_CHARSET;
5481         }
5482       if (INTEGERP (val))
5483         {
5484           charset = CHARSET_FROM_ID (XFASTINT (val));
5485           dim = CHARSET_DIMENSION (charset);
5486           for (idx = 1; idx < dim; idx++)
5487             {
5488               if (src == src_end)
5489                 goto too_short;
5490               ONE_MORE_BYTE (c);
5491               if (c < charset->code_space[(dim - 1 - idx) * 2]
5492                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5493                 break;
5494             }
5495           if (idx < dim)
5496             break;
5497         }
5498       else
5499         {
5500           idx = 1;
5501           for (; CONSP (val); val = XCDR (val))
5502             {
5503               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5504               dim = CHARSET_DIMENSION (charset);
5505               while (idx < dim)
5506                 {
5507                   if (src == src_end)
5508                     goto too_short;
5509                   ONE_MORE_BYTE (c);
5510                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5511                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5512                     break;
5513                   idx++;
5514                 }
5515               if (idx == dim)
5516                 {
5517                   val = Qnil;
5518                   break;
5519                 }
5520             }
5521           if (CONSP (val))
5522             break;
5523         }
5524     }
5525  too_short:
5526   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5527   return 0;
5528
5529  no_more_source:
5530   detect_info->found |= found;
5531   return 1;
5532 }
5533
5534 static void
5535 decode_coding_charset (struct coding_system *coding)
5536 {
5537   const unsigned char *src = coding->source + coding->consumed;
5538   const unsigned char *src_end = coding->source + coding->src_bytes;
5539   const unsigned char *src_base;
5540   int *charbuf = coding->charbuf + coding->charbuf_used;
5541   /* We may produce one charset annocation in one loop and one more at
5542      the end.  */
5543   int *charbuf_end
5544     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5545   int consumed_chars = 0, consumed_chars_base;
5546   int multibytep = coding->src_multibyte;
5547   Lisp_Object attrs, charset_list, valids;
5548   int char_offset = coding->produced_char;
5549   int last_offset = char_offset;
5550   int last_id = charset_ascii;
5551   int eol_crlf =
5552     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5553   int byte_after_cr = -1;
5554
5555   CODING_GET_INFO (coding, attrs, charset_list);
5556   valids = AREF (attrs, coding_attr_charset_valids);
5557
5558   while (1)
5559     {
5560       int c;
5561       Lisp_Object val;
5562       struct charset *charset;
5563       int dim;
5564       int len = 1;
5565       unsigned code;
5566
5567       src_base = src;
5568       consumed_chars_base = consumed_chars;
5569
5570       if (charbuf >= charbuf_end)
5571         {
5572           if (byte_after_cr >= 0)
5573             src_base--;
5574           break;
5575         }
5576
5577       if (byte_after_cr >= 0)
5578         {
5579           c = byte_after_cr;
5580           byte_after_cr = -1;
5581         }
5582       else
5583         {
5584           ONE_MORE_BYTE (c);
5585           if (eol_crlf && c == '\r')
5586             ONE_MORE_BYTE (byte_after_cr);
5587         }
5588       if (c < 0)
5589         goto invalid_code;
5590       code = c;
5591
5592       val = AREF (valids, c);
5593       if (! INTEGERP (val) && ! CONSP (val))
5594         goto invalid_code;
5595       if (INTEGERP (val))
5596         {
5597           charset = CHARSET_FROM_ID (XFASTINT (val));
5598           dim = CHARSET_DIMENSION (charset);
5599           while (len < dim)
5600             {
5601               ONE_MORE_BYTE (c);
5602               code = (code << 8) | c;
5603               len++;
5604             }
5605           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5606                               charset, code, c);
5607         }
5608       else
5609         {
5610           /* VAL is a list of charset IDs.  It is assured that the
5611              list is sorted by charset dimensions (smaller one
5612              comes first).  */
5613           while (CONSP (val))
5614             {
5615               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5616               dim = CHARSET_DIMENSION (charset);
5617               while (len < dim)
5618                 {
5619                   ONE_MORE_BYTE (c);
5620                   code = (code << 8) | c;
5621                   len++;
5622                 }
5623               CODING_DECODE_CHAR (coding, src, src_base,
5624                                   src_end, charset, code, c);
5625               if (c >= 0)
5626                 break;
5627               val = XCDR (val);
5628             }
5629         }
5630       if (c < 0)
5631         goto invalid_code;
5632       if (charset->id != charset_ascii
5633           && last_id != charset->id)
5634         {
5635           if (last_id != charset_ascii)
5636             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5637           last_id = charset->id;
5638           last_offset = char_offset;
5639         }
5640
5641       *charbuf++ = c;
5642       char_offset++;
5643       continue;
5644
5645     invalid_code:
5646       src = src_base;
5647       consumed_chars = consumed_chars_base;
5648       ONE_MORE_BYTE (c);
5649       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5650       char_offset++;
5651       coding->errors++;
5652     }
5653
5654  no_more_source:
5655   if (last_id != charset_ascii)
5656     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5657   coding->consumed_char += consumed_chars_base;
5658   coding->consumed = src_base - coding->source;
5659   coding->charbuf_used = charbuf - coding->charbuf;
5660 }
5661
5662 static int
5663 encode_coding_charset (struct coding_system *coding)
5664 {
5665   int multibytep = coding->dst_multibyte;
5666   int *charbuf = coding->charbuf;
5667   int *charbuf_end = charbuf + coding->charbuf_used;
5668   unsigned char *dst = coding->destination + coding->produced;
5669   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5670   int safe_room = MAX_MULTIBYTE_LENGTH;
5671   int produced_chars = 0;
5672   Lisp_Object attrs, charset_list;
5673   int ascii_compatible;
5674   int c;
5675
5676   CODING_GET_INFO (coding, attrs, charset_list);
5677   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5678
5679   while (charbuf < charbuf_end)
5680     {
5681       struct charset *charset;
5682       unsigned code;
5683
5684       ASSURE_DESTINATION (safe_room);
5685       c = *charbuf++;
5686       if (ascii_compatible && ASCII_CHAR_P (c))
5687         EMIT_ONE_ASCII_BYTE (c);
5688       else if (CHAR_BYTE8_P (c))
5689         {
5690           c = CHAR_TO_BYTE8 (c);
5691           EMIT_ONE_BYTE (c);
5692         }
5693       else
5694         {
5695           charset = char_charset (c, charset_list, &code);
5696           if (charset)
5697             {
5698               if (CHARSET_DIMENSION (charset) == 1)
5699                 EMIT_ONE_BYTE (code);
5700               else if (CHARSET_DIMENSION (charset) == 2)
5701                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5702               else if (CHARSET_DIMENSION (charset) == 3)
5703                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5704               else
5705                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5706                                  (code >> 8) & 0xFF, code & 0xFF);
5707             }
5708           else
5709             {
5710               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5711                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5712               else
5713                 c = coding->default_char;
5714               EMIT_ONE_BYTE (c);
5715             }
5716         }
5717     }
5718
5719   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5720   coding->produced_char += produced_chars;
5721   coding->produced = dst - coding->destination;
5722   return 0;
5723 }
5724
5725 \f
5726 /*** 7. C library functions ***/
5727
5728 /* Setup coding context CODING from information about CODING_SYSTEM.
5729    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5730    CODING_SYSTEM is invalid, signal an error.  */
5731
5732 void
5733 setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5734 {
5735   Lisp_Object attrs;
5736   Lisp_Object eol_type;
5737   Lisp_Object coding_type;
5738   Lisp_Object val;
5739
5740   if (NILP (coding_system))
5741     coding_system = Qundecided;
5742
5743   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5744
5745   attrs = CODING_ID_ATTRS (coding->id);
5746   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5747
5748   coding->mode = 0;
5749   coding->head_ascii = -1;
5750   if (VECTORP (eol_type))
5751     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5752                             | CODING_REQUIRE_DETECTION_MASK);
5753   else if (! EQ (eol_type, Qunix))
5754     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5755                             | CODING_REQUIRE_ENCODING_MASK);
5756   else
5757     coding->common_flags = 0;
5758   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5759     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5760   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5761     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5762   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5763     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5764
5765   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5766   coding->max_charset_id = SCHARS (val) - 1;
5767   coding->safe_charsets = SDATA (val);
5768   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5769   coding->carryover_bytes = 0;
5770
5771   coding_type = CODING_ATTR_TYPE (attrs);
5772   if (EQ (coding_type, Qundecided))
5773     {
5774       coding->detector = NULL;
5775       coding->decoder = decode_coding_raw_text;
5776       coding->encoder = encode_coding_raw_text;
5777       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5778     }
5779   else if (EQ (coding_type, Qiso_2022))
5780     {
5781       int i;
5782       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5783
5784       /* Invoke graphic register 0 to plane 0.  */
5785       CODING_ISO_INVOCATION (coding, 0) = 0;
5786       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5787       CODING_ISO_INVOCATION (coding, 1)
5788         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5789       /* Setup the initial status of designation.  */
5790       for (i = 0; i < 4; i++)
5791         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5792       /* Not single shifting initially.  */
5793       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5794       /* Beginning of buffer should also be regarded as bol. */
5795       CODING_ISO_BOL (coding) = 1;
5796       coding->detector = detect_coding_iso_2022;
5797       coding->decoder = decode_coding_iso_2022;
5798       coding->encoder = encode_coding_iso_2022;
5799       if (flags & CODING_ISO_FLAG_SAFE)
5800         coding->mode |= CODING_MODE_SAFE_ENCODING;
5801       coding->common_flags
5802         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5803             | CODING_REQUIRE_FLUSHING_MASK);
5804       if (flags & CODING_ISO_FLAG_COMPOSITION)
5805         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5806       if (flags & CODING_ISO_FLAG_DESIGNATION)
5807         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5808       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5809         {
5810           setup_iso_safe_charsets (attrs);
5811           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5812           coding->max_charset_id = SCHARS (val) - 1;
5813           coding->safe_charsets = SDATA (val);
5814         }
5815       CODING_ISO_FLAGS (coding) = flags;
5816       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5817       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5818       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5819       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5820     }
5821   else if (EQ (coding_type, Qcharset))
5822     {
5823       coding->detector = detect_coding_charset;
5824       coding->decoder = decode_coding_charset;
5825       coding->encoder = encode_coding_charset;
5826       coding->common_flags
5827         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5828     }
5829   else if (EQ (coding_type, Qutf_8))
5830     {
5831       val = AREF (attrs, coding_attr_utf_bom);
5832       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5833                                    : EQ (val, Qt) ? utf_with_bom
5834                                    : utf_without_bom);
5835       coding->detector = detect_coding_utf_8;
5836       coding->decoder = decode_coding_utf_8;
5837       coding->encoder = encode_coding_utf_8;
5838       coding->common_flags
5839         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5840       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5841         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5842     }
5843   else if (EQ (coding_type, Qutf_16))
5844     {
5845       val = AREF (attrs, coding_attr_utf_bom);
5846       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5847                                     : EQ (val, Qt) ? utf_with_bom
5848                                     : utf_without_bom);
5849       val = AREF (attrs, coding_attr_utf_16_endian);
5850       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5851                                        : utf_16_little_endian);
5852       CODING_UTF_16_SURROGATE (coding) = 0;
5853       coding->detector = detect_coding_utf_16;
5854       coding->decoder = decode_coding_utf_16;
5855       coding->encoder = encode_coding_utf_16;
5856       coding->common_flags
5857         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5858       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5859         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5860     }
5861   else if (EQ (coding_type, Qccl))
5862     {
5863       coding->detector = detect_coding_ccl;
5864       coding->decoder = decode_coding_ccl;
5865       coding->encoder = encode_coding_ccl;
5866       coding->common_flags
5867         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5868             | CODING_REQUIRE_FLUSHING_MASK);
5869     }
5870   else if (EQ (coding_type, Qemacs_mule))
5871     {
5872       coding->detector = detect_coding_emacs_mule;
5873       coding->decoder = decode_coding_emacs_mule;
5874       coding->encoder = encode_coding_emacs_mule;
5875       coding->common_flags
5876         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5877       coding->spec.emacs_mule.full_support = 1;
5878       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5879           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5880         {
5881           Lisp_Object tail, safe_charsets;
5882           int max_charset_id = 0;
5883
5884           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5885                tail = XCDR (tail))
5886             if (max_charset_id < XFASTINT (XCAR (tail)))
5887               max_charset_id = XFASTINT (XCAR (tail));
5888           safe_charsets = make_uninit_string (max_charset_id + 1);
5889           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5890           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5891                tail = XCDR (tail))
5892             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5893           coding->max_charset_id = max_charset_id;
5894           coding->safe_charsets = SDATA (safe_charsets);
5895           coding->spec.emacs_mule.full_support = 1;
5896         }
5897       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5898       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5899     }
5900   else if (EQ (coding_type, Qshift_jis))
5901     {
5902       coding->detector = detect_coding_sjis;
5903       coding->decoder = decode_coding_sjis;
5904       coding->encoder = encode_coding_sjis;
5905       coding->common_flags
5906         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5907     }
5908   else if (EQ (coding_type, Qbig5))
5909     {
5910       coding->detector = detect_coding_big5;
5911       coding->decoder = decode_coding_big5;
5912       coding->encoder = encode_coding_big5;
5913       coding->common_flags
5914         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5915     }
5916   else                          /* EQ (coding_type, Qraw_text) */
5917     {
5918       coding->detector = NULL;
5919       coding->decoder = decode_coding_raw_text;
5920       coding->encoder = encode_coding_raw_text;
5921       if (! EQ (eol_type, Qunix))
5922         {
5923           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5924           if (! VECTORP (eol_type))
5925             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5926         }
5927
5928     }
5929
5930   return;
5931 }
5932
5933 /* Return a list of charsets supported by CODING.  */
5934
5935 Lisp_Object
5936 coding_charset_list (struct coding_system *coding)
5937 {
5938   Lisp_Object attrs, charset_list;
5939
5940   CODING_GET_INFO (coding, attrs, charset_list);
5941   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5942     {
5943       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5944
5945       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5946         charset_list = Viso_2022_charset_list;
5947     }
5948   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5949     {
5950       charset_list = Vemacs_mule_charset_list;
5951     }
5952   return charset_list;
5953 }
5954
5955
5956 /* Return a list of charsets supported by CODING-SYSTEM.  */
5957
5958 Lisp_Object
5959 coding_system_charset_list (Lisp_Object coding_system)
5960 {
5961   int id;
5962   Lisp_Object attrs, charset_list;
5963
5964   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5965   attrs = CODING_ID_ATTRS (id);
5966
5967   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5968     {
5969       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5970
5971       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5972         charset_list = Viso_2022_charset_list;
5973       else
5974         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5975     }
5976   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5977     {
5978       charset_list = Vemacs_mule_charset_list;
5979     }
5980   else
5981     {
5982       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5983     }
5984   return charset_list;
5985 }
5986
5987
5988 /* Return raw-text or one of its subsidiaries that has the same
5989    eol_type as CODING-SYSTEM.  */
5990
5991 Lisp_Object
5992 raw_text_coding_system (Lisp_Object coding_system)
5993 {
5994   Lisp_Object spec, attrs;
5995   Lisp_Object eol_type, raw_text_eol_type;
5996
5997   if (NILP (coding_system))
5998     return Qraw_text;
5999   spec = CODING_SYSTEM_SPEC (coding_system);
6000   attrs = AREF (spec, 0);
6001
6002   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
6003     return coding_system;
6004
6005   eol_type = AREF (spec, 2);
6006   if (VECTORP (eol_type))
6007     return Qraw_text;
6008   spec = CODING_SYSTEM_SPEC (Qraw_text);
6009   raw_text_eol_type = AREF (spec, 2);
6010   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6011           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6012           : AREF (raw_text_eol_type, 2));
6013 }
6014
6015
6016 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
6017    does, return one of the subsidiary that has the same eol-spec as
6018    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
6019    inherit end-of-line format from the system's setting
6020    (system_eol_type).  */
6021
6022 Lisp_Object
6023 coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
6024 {
6025   Lisp_Object spec, eol_type;
6026
6027   if (NILP (coding_system))
6028     coding_system = Qraw_text;
6029   spec = CODING_SYSTEM_SPEC (coding_system);
6030   eol_type = AREF (spec, 2);
6031   if (VECTORP (eol_type))
6032     {
6033       Lisp_Object parent_eol_type;
6034
6035       if (! NILP (parent))
6036         {
6037           Lisp_Object parent_spec;
6038
6039           parent_spec = CODING_SYSTEM_SPEC (parent);
6040           parent_eol_type = AREF (parent_spec, 2);
6041         }
6042       else
6043         parent_eol_type = system_eol_type;
6044       if (EQ (parent_eol_type, Qunix))
6045         coding_system = AREF (eol_type, 0);
6046       else if (EQ (parent_eol_type, Qdos))
6047         coding_system = AREF (eol_type, 1);
6048       else if (EQ (parent_eol_type, Qmac))
6049         coding_system = AREF (eol_type, 2);
6050     }
6051   return coding_system;
6052 }
6053
6054 /* Emacs has a mechanism to automatically detect a coding system if it
6055    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6056    it's impossible to distinguish some coding systems accurately
6057    because they use the same range of codes.  So, at first, coding
6058    systems are categorized into 7, those are:
6059
6060    o coding-category-emacs-mule
6061
6062         The category for a coding system which has the same code range
6063         as Emacs' internal format.  Assigned the coding-system (Lisp
6064         symbol) `emacs-mule' by default.
6065
6066    o coding-category-sjis
6067
6068         The category for a coding system which has the same code range
6069         as SJIS.  Assigned the coding-system (Lisp
6070         symbol) `japanese-shift-jis' by default.
6071
6072    o coding-category-iso-7
6073
6074         The category for a coding system which has the same code range
6075         as ISO2022 of 7-bit environment.  This doesn't use any locking
6076         shift and single shift functions.  This can encode/decode all
6077         charsets.  Assigned the coding-system (Lisp symbol)
6078         `iso-2022-7bit' by default.
6079
6080    o coding-category-iso-7-tight
6081
6082         Same as coding-category-iso-7 except that this can
6083         encode/decode only the specified charsets.
6084
6085    o coding-category-iso-8-1
6086
6087         The category for a coding system which has the same code range
6088         as ISO2022 of 8-bit environment and graphic plane 1 used only
6089         for DIMENSION1 charset.  This doesn't use any locking shift
6090         and single shift functions.  Assigned the coding-system (Lisp
6091         symbol) `iso-latin-1' by default.
6092
6093    o coding-category-iso-8-2
6094
6095         The category for a coding system which has the same code range
6096         as ISO2022 of 8-bit environment and graphic plane 1 used only
6097         for DIMENSION2 charset.  This doesn't use any locking shift
6098         and single shift functions.  Assigned the coding-system (Lisp
6099         symbol) `japanese-iso-8bit' by default.
6100
6101    o coding-category-iso-7-else
6102
6103         The category for a coding system which has the same code range
6104         as ISO2022 of 7-bit environemnt but uses locking shift or
6105         single shift functions.  Assigned the coding-system (Lisp
6106         symbol) `iso-2022-7bit-lock' by default.
6107
6108    o coding-category-iso-8-else
6109
6110         The category for a coding system which has the same code range
6111         as ISO2022 of 8-bit environemnt but uses locking shift or
6112         single shift functions.  Assigned the coding-system (Lisp
6113         symbol) `iso-2022-8bit-ss2' by default.
6114
6115    o coding-category-big5
6116
6117         The category for a coding system which has the same code range
6118         as BIG5.  Assigned the coding-system (Lisp symbol)
6119         `cn-big5' by default.
6120
6121    o coding-category-utf-8
6122
6123         The category for a coding system which has the same code range
6124         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6125         symbol) `utf-8' by default.
6126
6127    o coding-category-utf-16-be
6128
6129         The category for a coding system in which a text has an
6130         Unicode signature (cf. Unicode Standard) in the order of BIG
6131         endian at the head.  Assigned the coding-system (Lisp symbol)
6132         `utf-16-be' by default.
6133
6134    o coding-category-utf-16-le
6135
6136         The category for a coding system in which a text has an
6137         Unicode signature (cf. Unicode Standard) in the order of
6138         LITTLE endian at the head.  Assigned the coding-system (Lisp
6139         symbol) `utf-16-le' by default.
6140
6141    o coding-category-ccl
6142
6143         The category for a coding system of which encoder/decoder is
6144         written in CCL programs.  The default value is nil, i.e., no
6145         coding system is assigned.
6146
6147    o coding-category-binary
6148
6149         The category for a coding system not categorized in any of the
6150         above.  Assigned the coding-system (Lisp symbol)
6151         `no-conversion' by default.
6152
6153    Each of them is a Lisp symbol and the value is an actual
6154    `coding-system's (this is also a Lisp symbol) assigned by a user.
6155    What Emacs does actually is to detect a category of coding system.
6156    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6157    decide only one possible category, it selects a category of the
6158    highest priority.  Priorities of categories are also specified by a
6159    user in a Lisp variable `coding-category-list'.
6160
6161 */
6162
6163 #define EOL_SEEN_NONE   0
6164 #define EOL_SEEN_LF     1
6165 #define EOL_SEEN_CR     2
6166 #define EOL_SEEN_CRLF   4
6167
6168 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6169    SOURCE is encoded.  If CATEGORY is one of
6170    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6171    two-byte, else they are encoded by one-byte.
6172
6173    Return one of EOL_SEEN_XXX.  */
6174
6175 #define MAX_EOL_CHECK_COUNT 3
6176
6177 static int
6178 detect_eol (const unsigned char *source, EMACS_INT src_bytes,
6179             enum coding_category category)
6180 {
6181   const unsigned char *src = source, *src_end = src + src_bytes;
6182   unsigned char c;
6183   int total  = 0;
6184   int eol_seen = EOL_SEEN_NONE;
6185
6186   if ((1 << category) & CATEGORY_MASK_UTF_16)
6187     {
6188       int msb, lsb;
6189
6190       msb = category == (coding_category_utf_16_le
6191                          | coding_category_utf_16_le_nosig);
6192       lsb = 1 - msb;
6193
6194       while (src + 1 < src_end)
6195         {
6196           c = src[lsb];
6197           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6198             {
6199               int this_eol;
6200
6201               if (c == '\n')
6202                 this_eol = EOL_SEEN_LF;
6203               else if (src + 3 >= src_end
6204                        || src[msb + 2] != 0
6205                        || src[lsb + 2] != '\n')
6206                 this_eol = EOL_SEEN_CR;
6207               else
6208                 {
6209                   this_eol = EOL_SEEN_CRLF;
6210                   src += 2;
6211                 }
6212
6213               if (eol_seen == EOL_SEEN_NONE)
6214                 /* This is the first end-of-line.  */
6215                 eol_seen = this_eol;
6216               else if (eol_seen != this_eol)
6217                 {
6218                   /* The found type is different from what found before.
6219                      Allow for stray ^M characters in DOS EOL files.  */
6220                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6221                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6222                     eol_seen = EOL_SEEN_CRLF;
6223                   else
6224                     {
6225                       eol_seen = EOL_SEEN_LF;
6226                       break;
6227                     }
6228                 }
6229               if (++total == MAX_EOL_CHECK_COUNT)
6230                 break;
6231             }
6232           src += 2;
6233         }
6234     }
6235   else
6236     {
6237       while (src < src_end)
6238         {
6239           c = *src++;
6240           if (c == '\n' || c == '\r')
6241             {
6242               int this_eol;
6243
6244               if (c == '\n')
6245                 this_eol = EOL_SEEN_LF;
6246               else if (src >= src_end || *src != '\n')
6247                 this_eol = EOL_SEEN_CR;
6248               else
6249                 this_eol = EOL_SEEN_CRLF, src++;
6250
6251               if (eol_seen == EOL_SEEN_NONE)
6252                 /* This is the first end-of-line.  */
6253                 eol_seen = this_eol;
6254               else if (eol_seen != this_eol)
6255                 {
6256                   /* The found type is different from what found before.
6257                      Allow for stray ^M characters in DOS EOL files.  */
6258                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6259                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6260                     eol_seen = EOL_SEEN_CRLF;
6261                   else
6262                     {
6263                       eol_seen = EOL_SEEN_LF;
6264                       break;
6265                     }
6266                 }
6267               if (++total == MAX_EOL_CHECK_COUNT)
6268                 break;
6269             }
6270         }
6271     }
6272   return eol_seen;
6273 }
6274
6275
6276 static Lisp_Object
6277 adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6278 {
6279   Lisp_Object eol_type;
6280
6281   eol_type = CODING_ID_EOL_TYPE (coding->id);
6282   if (eol_seen & EOL_SEEN_LF)
6283     {
6284       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6285       eol_type = Qunix;
6286     }
6287   else if (eol_seen & EOL_SEEN_CRLF)
6288     {
6289       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6290       eol_type = Qdos;
6291     }
6292   else if (eol_seen & EOL_SEEN_CR)
6293     {
6294       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6295       eol_type = Qmac;
6296     }
6297   return eol_type;
6298 }
6299
6300 /* Detect how a text specified in CODING is encoded.  If a coding
6301    system is detected, update fields of CODING by the detected coding
6302    system.  */
6303
6304 void
6305 detect_coding (struct coding_system *coding)
6306 {
6307   const unsigned char *src, *src_end;
6308   int saved_mode = coding->mode;
6309
6310   coding->consumed = coding->consumed_char = 0;
6311   coding->produced = coding->produced_char = 0;
6312   coding_set_source (coding);
6313
6314   src_end = coding->source + coding->src_bytes;
6315   coding->head_ascii = 0;
6316
6317   /* If we have not yet decided the text encoding type, detect it
6318      now.  */
6319   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6320     {
6321       int c, i;
6322       struct coding_detection_info detect_info;
6323       int null_byte_found = 0, eight_bit_found = 0;
6324
6325       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6326       for (src = coding->source; src < src_end; src++)
6327         {
6328           c = *src;
6329           if (c & 0x80)
6330             {
6331               eight_bit_found = 1;
6332               if (null_byte_found)
6333                 break;
6334             }
6335           else if (c < 0x20)
6336             {
6337               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6338                   && ! inhibit_iso_escape_detection
6339                   && ! detect_info.checked)
6340                 {
6341                   if (detect_coding_iso_2022 (coding, &detect_info))
6342                     {
6343                       /* We have scanned the whole data.  */
6344                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6345                         {
6346                           /* We didn't find an 8-bit code.  We may
6347                              have found a null-byte, but it's very
6348                              rare that a binary file conforms to
6349                              ISO-2022.  */
6350                           src = src_end;
6351                           coding->head_ascii = src - coding->source;
6352                         }
6353                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6354                       break;
6355                     }
6356                 }
6357               else if (! c && !inhibit_null_byte_detection)
6358                 {
6359                   null_byte_found = 1;
6360                   if (eight_bit_found)
6361                     break;
6362                 }
6363               if (! eight_bit_found)
6364                 coding->head_ascii++;
6365             }
6366           else if (! eight_bit_found)
6367             coding->head_ascii++;
6368         }
6369
6370       if (null_byte_found || eight_bit_found
6371           || coding->head_ascii < coding->src_bytes
6372           || detect_info.found)
6373         {
6374           enum coding_category category;
6375           struct coding_system *this;
6376
6377           if (coding->head_ascii == coding->src_bytes)
6378             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6379             for (i = 0; i < coding_category_raw_text; i++)
6380               {
6381                 category = coding_priorities[i];
6382                 this = coding_categories + category;
6383                 if (detect_info.found & (1 << category))
6384                   break;
6385               }
6386           else
6387             {
6388               if (null_byte_found)
6389                 {
6390                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6391                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6392                 }
6393               for (i = 0; i < coding_category_raw_text; i++)
6394                 {
6395                   category = coding_priorities[i];
6396                   this = coding_categories + category;
6397                   if (this->id < 0)
6398                     {
6399                       /* No coding system of this category is defined.  */
6400                       detect_info.rejected |= (1 << category);
6401                     }
6402                   else if (category >= coding_category_raw_text)
6403                     continue;
6404                   else if (detect_info.checked & (1 << category))
6405                     {
6406                       if (detect_info.found & (1 << category))
6407                         break;
6408                     }
6409                   else if ((*(this->detector)) (coding, &detect_info)
6410                            && detect_info.found & (1 << category))
6411                     {
6412                       if (category == coding_category_utf_16_auto)
6413                         {
6414                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6415                             category = coding_category_utf_16_le;
6416                           else
6417                             category = coding_category_utf_16_be;
6418                         }
6419                       break;
6420                     }
6421                 }
6422             }
6423
6424           if (i < coding_category_raw_text)
6425             setup_coding_system (CODING_ID_NAME (this->id), coding);
6426           else if (null_byte_found)
6427             setup_coding_system (Qno_conversion, coding);
6428           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6429                    == CATEGORY_MASK_ANY)
6430             setup_coding_system (Qraw_text, coding);
6431           else if (detect_info.rejected)
6432             for (i = 0; i < coding_category_raw_text; i++)
6433               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6434                 {
6435                   this = coding_categories + coding_priorities[i];
6436                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6437                   break;
6438                 }
6439         }
6440     }
6441   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6442            == coding_category_utf_8_auto)
6443     {
6444       Lisp_Object coding_systems;
6445       struct coding_detection_info detect_info;
6446
6447       coding_systems
6448         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6449       detect_info.found = detect_info.rejected = 0;
6450       coding->head_ascii = 0;
6451       if (CONSP (coding_systems)
6452           && detect_coding_utf_8 (coding, &detect_info))
6453         {
6454           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6455             setup_coding_system (XCAR (coding_systems), coding);
6456           else
6457             setup_coding_system (XCDR (coding_systems), coding);
6458         }
6459     }
6460   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6461            == coding_category_utf_16_auto)
6462     {
6463       Lisp_Object coding_systems;
6464       struct coding_detection_info detect_info;
6465
6466       coding_systems
6467         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6468       detect_info.found = detect_info.rejected = 0;
6469       coding->head_ascii = 0;
6470       if (CONSP (coding_systems)
6471           && detect_coding_utf_16 (coding, &detect_info))
6472         {
6473           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6474             setup_coding_system (XCAR (coding_systems), coding);
6475           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6476             setup_coding_system (XCDR (coding_systems), coding);
6477         }
6478     }
6479   coding->mode = saved_mode;
6480 }
6481
6482
6483 static void
6484 decode_eol (struct coding_system *coding)
6485 {
6486   Lisp_Object eol_type;
6487   unsigned char *p, *pbeg, *pend;
6488
6489   eol_type = CODING_ID_EOL_TYPE (coding->id);
6490   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6491     return;
6492
6493   if (NILP (coding->dst_object))
6494     pbeg = coding->destination;
6495   else
6496     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6497   pend = pbeg + coding->produced;
6498
6499   if (VECTORP (eol_type))
6500     {
6501       int eol_seen = EOL_SEEN_NONE;
6502
6503       for (p = pbeg; p < pend; p++)
6504         {
6505           if (*p == '\n')
6506             eol_seen |= EOL_SEEN_LF;
6507           else if (*p == '\r')
6508             {
6509               if (p + 1 < pend && *(p + 1) == '\n')
6510                 {
6511                   eol_seen |= EOL_SEEN_CRLF;
6512                   p++;
6513                 }
6514               else
6515                 eol_seen |= EOL_SEEN_CR;
6516             }
6517         }
6518       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6519       if ((eol_seen & EOL_SEEN_CRLF) != 0
6520           && (eol_seen & EOL_SEEN_CR) != 0
6521           && (eol_seen & EOL_SEEN_LF) == 0)
6522         eol_seen = EOL_SEEN_CRLF;
6523       else if (eol_seen != EOL_SEEN_NONE
6524           && eol_seen != EOL_SEEN_LF
6525           && eol_seen != EOL_SEEN_CRLF
6526           && eol_seen != EOL_SEEN_CR)
6527         eol_seen = EOL_SEEN_LF;
6528       if (eol_seen != EOL_SEEN_NONE)
6529         eol_type = adjust_coding_eol_type (coding, eol_seen);
6530     }
6531
6532   if (EQ (eol_type, Qmac))
6533     {
6534       for (p = pbeg; p < pend; p++)
6535         if (*p == '\r')
6536           *p = '\n';
6537     }
6538   else if (EQ (eol_type, Qdos))
6539     {
6540       int n = 0;
6541
6542       if (NILP (coding->dst_object))
6543         {
6544           /* Start deleting '\r' from the tail to minimize the memory
6545              movement.  */
6546           for (p = pend - 2; p >= pbeg; p--)
6547             if (*p == '\r')
6548               {
6549                 memmove (p, p + 1, pend-- - p - 1);
6550                 n++;
6551               }
6552         }
6553       else
6554         {
6555           int pos_byte = coding->dst_pos_byte;
6556           int pos = coding->dst_pos;
6557           int pos_end = pos + coding->produced_char - 1;
6558
6559           while (pos < pos_end)
6560             {
6561               p = BYTE_POS_ADDR (pos_byte);
6562               if (*p == '\r' && p[1] == '\n')
6563                 {
6564                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6565                   n++;
6566                   pos_end--;
6567                 }
6568               pos++;
6569               if (coding->dst_multibyte)
6570                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6571               else
6572                 pos_byte++;
6573             }
6574         }
6575       coding->produced -= n;
6576       coding->produced_char -= n;
6577     }
6578 }
6579
6580
6581 /* Return a translation table (or list of them) from coding system
6582    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6583    decoding (ENCODEP is zero). */
6584
6585 static Lisp_Object
6586 get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
6587 {
6588   Lisp_Object standard, translation_table;
6589   Lisp_Object val;
6590
6591   if (NILP (Venable_character_translation))
6592     {
6593       if (max_lookup)
6594         *max_lookup = 0;
6595       return Qnil;
6596     }
6597   if (encodep)
6598     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6599       standard = Vstandard_translation_table_for_encode;
6600   else
6601     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6602       standard = Vstandard_translation_table_for_decode;
6603   if (NILP (translation_table))
6604     translation_table = standard;
6605   else
6606     {
6607       if (SYMBOLP (translation_table))
6608         translation_table = Fget (translation_table, Qtranslation_table);
6609       else if (CONSP (translation_table))
6610         {
6611           translation_table = Fcopy_sequence (translation_table);
6612           for (val = translation_table; CONSP (val); val = XCDR (val))
6613             if (SYMBOLP (XCAR (val)))
6614               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6615         }
6616       if (CHAR_TABLE_P (standard))
6617         {
6618           if (CONSP (translation_table))
6619             translation_table = nconc2 (translation_table,
6620                                         Fcons (standard, Qnil));
6621           else
6622             translation_table = Fcons (translation_table,
6623                                        Fcons (standard, Qnil));
6624         }
6625     }
6626
6627   if (max_lookup)
6628     {
6629       *max_lookup = 1;
6630       if (CHAR_TABLE_P (translation_table)
6631           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6632         {
6633           val = XCHAR_TABLE (translation_table)->extras[1];
6634           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6635             *max_lookup = XFASTINT (val);
6636         }
6637       else if (CONSP (translation_table))
6638         {
6639           Lisp_Object tail, val;
6640
6641           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6642             if (CHAR_TABLE_P (XCAR (tail))
6643                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6644               {
6645                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6646                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6647                   *max_lookup = XFASTINT (val);
6648               }
6649         }
6650     }
6651   return translation_table;
6652 }
6653
6654 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6655   do {                                                          \
6656     trans = Qnil;                                               \
6657     if (CHAR_TABLE_P (table))                                   \
6658       {                                                         \
6659         trans = CHAR_TABLE_REF (table, c);                      \
6660         if (CHARACTERP (trans))                                 \
6661           c = XFASTINT (trans), trans = Qnil;                   \
6662       }                                                         \
6663     else if (CONSP (table))                                     \
6664       {                                                         \
6665         Lisp_Object tail;                                       \
6666                                                                 \
6667         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6668           if (CHAR_TABLE_P (XCAR (tail)))                       \
6669             {                                                   \
6670               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6671               if (CHARACTERP (trans))                           \
6672                 c = XFASTINT (trans), trans = Qnil;             \
6673               else if (! NILP (trans))                          \
6674                 break;                                          \
6675             }                                                   \
6676       }                                                         \
6677   } while (0)
6678
6679
6680 /* Return a translation of character(s) at BUF according to TRANS.
6681    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6682    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6683    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6684    translation is found, and Qnil if not found..
6685    If BUF is too short to lookup characters in FROM, return Qt.  */
6686
6687 static Lisp_Object
6688 get_translation (Lisp_Object trans, int *buf, int *buf_end)
6689 {
6690
6691   if (INTEGERP (trans))
6692     return trans;
6693   for (; CONSP (trans); trans = XCDR (trans))
6694     {
6695       Lisp_Object val = XCAR (trans);
6696       Lisp_Object from = XCAR (val);
6697       int len = ASIZE (from);
6698       int i;
6699
6700       for (i = 0; i < len; i++)
6701         {
6702           if (buf + i == buf_end)
6703             return Qt;
6704           if (XINT (AREF (from, i)) != buf[i])
6705             break;
6706         }
6707       if (i == len)
6708         return val;
6709     }
6710   return Qnil;
6711 }
6712
6713
6714 static int
6715 produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6716                int last_block)
6717 {
6718   unsigned char *dst = coding->destination + coding->produced;
6719   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6720   EMACS_INT produced;
6721   EMACS_INT produced_chars = 0;
6722   int carryover = 0;
6723
6724   if (! coding->chars_at_source)
6725     {
6726       /* Source characters are in coding->charbuf.  */
6727       int *buf = coding->charbuf;
6728       int *buf_end = buf + coding->charbuf_used;
6729
6730       if (EQ (coding->src_object, coding->dst_object))
6731         {
6732           coding_set_source (coding);
6733           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6734         }
6735
6736       while (buf < buf_end)
6737         {
6738           int c = *buf, i;
6739
6740           if (c >= 0)
6741             {
6742               int from_nchars = 1, to_nchars = 1;
6743               Lisp_Object trans = Qnil;
6744
6745               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6746               if (! NILP (trans))
6747                 {
6748                   trans = get_translation (trans, buf, buf_end);
6749                   if (INTEGERP (trans))
6750                     c = XINT (trans);
6751                   else if (CONSP (trans))
6752                     {
6753                       from_nchars = ASIZE (XCAR (trans));
6754                       trans = XCDR (trans);
6755                       if (INTEGERP (trans))
6756                         c = XINT (trans);
6757                       else
6758                         {
6759                           to_nchars = ASIZE (trans);
6760                           c = XINT (AREF (trans, 0));
6761                         }
6762                     }
6763                   else if (EQ (trans, Qt) && ! last_block)
6764                     break;
6765                 }
6766
6767               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6768                 {
6769                   dst = alloc_destination (coding,
6770                                            buf_end - buf
6771                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6772                                            dst);
6773                   if (EQ (coding->src_object, coding->dst_object))
6774                     {
6775                       coding_set_source (coding);
6776                       dst_end = (((unsigned char *) coding->source)
6777                                  + coding->consumed);
6778                     }
6779                   else
6780                     dst_end = coding->destination + coding->dst_bytes;
6781                 }
6782
6783               for (i = 0; i < to_nchars; i++)
6784                 {
6785                   if (i > 0)
6786                     c = XINT (AREF (trans, i));
6787                   if (coding->dst_multibyte
6788                       || ! CHAR_BYTE8_P (c))
6789                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6790                   else
6791                     *dst++ = CHAR_TO_BYTE8 (c);
6792                 }
6793               produced_chars += to_nchars;
6794               buf += from_nchars;
6795             }
6796           else
6797             /* This is an annotation datum.  (-C) is the length.  */
6798             buf += -c;
6799         }
6800       carryover = buf_end - buf;
6801     }
6802   else
6803     {
6804       /* Source characters are at coding->source.  */
6805       const unsigned char *src = coding->source;
6806       const unsigned char *src_end = src + coding->consumed;
6807
6808       if (EQ (coding->dst_object, coding->src_object))
6809         dst_end = (unsigned char *) src;
6810       if (coding->src_multibyte != coding->dst_multibyte)
6811         {
6812           if (coding->src_multibyte)
6813             {
6814               int multibytep = 1;
6815               EMACS_INT consumed_chars = 0;
6816
6817               while (1)
6818                 {
6819                   const unsigned char *src_base = src;
6820                   int c;
6821
6822                   ONE_MORE_BYTE (c);
6823                   if (dst == dst_end)
6824                     {
6825                       if (EQ (coding->src_object, coding->dst_object))
6826                         dst_end = (unsigned char *) src;
6827                       if (dst == dst_end)
6828                         {
6829                           EMACS_INT offset = src - coding->source;
6830
6831                           dst = alloc_destination (coding, src_end - src + 1,
6832                                                    dst);
6833                           dst_end = coding->destination + coding->dst_bytes;
6834                           coding_set_source (coding);
6835                           src = coding->source + offset;
6836                           src_end = coding->source + coding->src_bytes;
6837                           if (EQ (coding->src_object, coding->dst_object))
6838                             dst_end = (unsigned char *) src;
6839                         }
6840                     }
6841                   *dst++ = c;
6842                   produced_chars++;
6843                 }
6844             no_more_source:
6845               ;
6846             }
6847           else
6848             while (src < src_end)
6849               {
6850                 int multibytep = 1;
6851                 int c = *src++;
6852
6853                 if (dst >= dst_end - 1)
6854                   {
6855                     if (EQ (coding->src_object, coding->dst_object))
6856                       dst_end = (unsigned char *) src;
6857                     if (dst >= dst_end - 1)
6858                       {
6859                         EMACS_INT offset = src - coding->source;
6860                         EMACS_INT more_bytes;
6861
6862                         if (EQ (coding->src_object, coding->dst_object))
6863                           more_bytes = ((src_end - src) / 2) + 2;
6864                         else
6865                           more_bytes = src_end - src + 2;
6866                         dst = alloc_destination (coding, more_bytes, dst);
6867                         dst_end = coding->destination + coding->dst_bytes;
6868                         coding_set_source (coding);
6869                         src = coding->source + offset;
6870                         src_end = coding->source + coding->src_bytes;
6871                         if (EQ (coding->src_object, coding->dst_object))
6872                           dst_end = (unsigned char *) src;
6873                       }
6874                   }
6875                 EMIT_ONE_BYTE (c);
6876               }
6877         }
6878       else
6879         {
6880           if (!EQ (coding->src_object, coding->dst_object))
6881             {
6882               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6883
6884               if (require > 0)
6885                 {
6886                   EMACS_INT offset = src - coding->source;
6887
6888                   dst = alloc_destination (coding, require, dst);
6889                   coding_set_source (coding);
6890                   src = coding->source + offset;
6891                   src_end = coding->source + coding->src_bytes;
6892                 }
6893             }
6894           produced_chars = coding->consumed_char;
6895           while (src < src_end)
6896             *dst++ = *src++;
6897         }
6898     }
6899
6900   produced = dst - (coding->destination + coding->produced);
6901   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6902     insert_from_gap (produced_chars, produced);
6903   coding->produced += produced;
6904   coding->produced_char += produced_chars;
6905   return carryover;
6906 }
6907
6908 /* Compose text in CODING->object according to the annotation data at
6909    CHARBUF.  CHARBUF is an array:
6910      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6911  */
6912
6913 static INLINE void
6914 produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6915 {
6916   int len;
6917   EMACS_INT to;
6918   enum composition_method method;
6919   Lisp_Object components;
6920
6921   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6922   to = pos + charbuf[2];
6923   method = (enum composition_method) (charbuf[4]);
6924
6925   if (method == COMPOSITION_RELATIVE)
6926     components = Qnil;
6927   else
6928     {
6929       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6930       int i, j;
6931
6932       if (method == COMPOSITION_WITH_RULE)
6933         len = charbuf[2] * 3 - 2;
6934       charbuf += MAX_ANNOTATION_LENGTH;
6935       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6936       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6937         {
6938           if (charbuf[i] >= 0)
6939             args[j] = make_number (charbuf[i]);
6940           else
6941             {
6942               i++;
6943               args[j] = make_number (charbuf[i] % 0x100);
6944             }
6945         }
6946       components = (i == j ? Fstring (j, args) : Fvector (j, args));
6947     }
6948   compose_text (pos, to, components, Qnil, coding->dst_object);
6949 }
6950
6951
6952 /* Put `charset' property on text in CODING->object according to
6953    the annotation data at CHARBUF.  CHARBUF is an array:
6954      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6955  */
6956
6957 static INLINE void
6958 produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6959 {
6960   EMACS_INT from = pos - charbuf[2];
6961   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6962
6963   Fput_text_property (make_number (from), make_number (pos),
6964                       Qcharset, CHARSET_NAME (charset),
6965                       coding->dst_object);
6966 }
6967
6968
6969 #define CHARBUF_SIZE 0x4000
6970
6971 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6972   do {                                                                  \
6973     int size = CHARBUF_SIZE;                                            \
6974                                                                         \
6975     coding->charbuf = NULL;                                             \
6976     while (size > 1024)                                                 \
6977       {                                                                 \
6978         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6979         if (coding->charbuf)                                            \
6980           break;                                                        \
6981         size >>= 1;                                                     \
6982       }                                                                 \
6983     if (! coding->charbuf)                                              \
6984       {                                                                 \
6985         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6986         return coding->result;                                          \
6987       }                                                                 \
6988     coding->charbuf_size = size;                                        \
6989   } while (0)
6990
6991
6992 static void
6993 produce_annotation (struct coding_system *coding, EMACS_INT pos)
6994 {
6995   int *charbuf = coding->charbuf;
6996   int *charbuf_end = charbuf + coding->charbuf_used;
6997
6998   if (NILP (coding->dst_object))
6999     return;
7000
7001   while (charbuf < charbuf_end)
7002     {
7003       if (*charbuf >= 0)
7004         pos++, charbuf++;
7005       else
7006         {
7007           int len = -*charbuf;
7008
7009           if (len > 2)
7010             switch (charbuf[1])
7011               {
7012               case CODING_ANNOTATE_COMPOSITION_MASK:
7013                 produce_composition (coding, charbuf, pos);
7014                 break;
7015               case CODING_ANNOTATE_CHARSET_MASK:
7016                 produce_charset (coding, charbuf, pos);
7017                 break;
7018               }
7019           charbuf += len;
7020         }
7021     }
7022 }
7023
7024 /* Decode the data at CODING->src_object into CODING->dst_object.
7025    CODING->src_object is a buffer, a string, or nil.
7026    CODING->dst_object is a buffer.
7027
7028    If CODING->src_object is a buffer, it must be the current buffer.
7029    In this case, if CODING->src_pos is positive, it is a position of
7030    the source text in the buffer, otherwise, the source text is in the
7031    gap area of the buffer, and CODING->src_pos specifies the offset of
7032    the text from GPT (which must be the same as PT).  If this is the
7033    same buffer as CODING->dst_object, CODING->src_pos must be
7034    negative.
7035
7036    If CODING->src_object is a string, CODING->src_pos is an index to
7037    that string.
7038
7039    If CODING->src_object is nil, CODING->source must already point to
7040    the non-relocatable memory area.  In this case, CODING->src_pos is
7041    an offset from CODING->source.
7042
7043    The decoded data is inserted at the current point of the buffer
7044    CODING->dst_object.
7045 */
7046
7047 static int
7048 decode_coding (struct coding_system *coding)
7049 {
7050   Lisp_Object attrs;
7051   Lisp_Object undo_list;
7052   Lisp_Object translation_table;
7053   struct ccl_spec cclspec;
7054   int carryover;
7055   int i;
7056
7057   if (BUFFERP (coding->src_object)
7058       && coding->src_pos > 0
7059       && coding->src_pos < GPT
7060       && coding->src_pos + coding->src_chars > GPT)
7061     move_gap_both (coding->src_pos, coding->src_pos_byte);
7062
7063   undo_list = Qt;
7064   if (BUFFERP (coding->dst_object))
7065     {
7066       if (current_buffer != XBUFFER (coding->dst_object))
7067         set_buffer_internal (XBUFFER (coding->dst_object));
7068       if (GPT != PT)
7069         move_gap_both (PT, PT_BYTE);
7070       undo_list = current_buffer->undo_list;
7071       current_buffer->undo_list = Qt;
7072     }
7073
7074   coding->consumed = coding->consumed_char = 0;
7075   coding->produced = coding->produced_char = 0;
7076   coding->chars_at_source = 0;
7077   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7078   coding->errors = 0;
7079
7080   ALLOC_CONVERSION_WORK_AREA (coding);
7081
7082   attrs = CODING_ID_ATTRS (coding->id);
7083   translation_table = get_translation_table (attrs, 0, NULL);
7084
7085   carryover = 0;
7086   if (coding->decoder == decode_coding_ccl)
7087     {
7088       coding->spec.ccl = &cclspec;
7089       setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7090     }
7091   do
7092     {
7093       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7094
7095       coding_set_source (coding);
7096       coding->annotated = 0;
7097       coding->charbuf_used = carryover;
7098       (*(coding->decoder)) (coding);
7099       coding_set_destination (coding);
7100       carryover = produce_chars (coding, translation_table, 0);
7101       if (coding->annotated)
7102         produce_annotation (coding, pos);
7103       for (i = 0; i < carryover; i++)
7104         coding->charbuf[i]
7105           = coding->charbuf[coding->charbuf_used - carryover + i];
7106     }
7107   while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7108          || (coding->consumed < coding->src_bytes
7109              && (coding->result == CODING_RESULT_SUCCESS
7110                  || coding->result == CODING_RESULT_INVALID_SRC)));
7111
7112   if (carryover > 0)
7113     {
7114       coding_set_destination (coding);
7115       coding->charbuf_used = carryover;
7116       produce_chars (coding, translation_table, 1);
7117     }
7118
7119   coding->carryover_bytes = 0;
7120   if (coding->consumed < coding->src_bytes)
7121     {
7122       int nbytes = coding->src_bytes - coding->consumed;
7123       const unsigned char *src;
7124
7125       coding_set_source (coding);
7126       coding_set_destination (coding);
7127       src = coding->source + coding->consumed;
7128
7129       if (coding->mode & CODING_MODE_LAST_BLOCK)
7130         {
7131           /* Flush out unprocessed data as binary chars.  We are sure
7132              that the number of data is less than the size of
7133              coding->charbuf.  */
7134           coding->charbuf_used = 0;
7135           coding->chars_at_source = 0;
7136
7137           while (nbytes-- > 0)
7138             {
7139               int c = *src++;
7140
7141               if (c & 0x80)
7142                 c = BYTE8_TO_CHAR (c);
7143               coding->charbuf[coding->charbuf_used++] = c;
7144             }
7145           produce_chars (coding, Qnil, 1);
7146         }
7147       else
7148         {
7149           /* Record unprocessed bytes in coding->carryover.  We are
7150              sure that the number of data is less than the size of
7151              coding->carryover.  */
7152           unsigned char *p = coding->carryover;
7153
7154           if (nbytes > sizeof coding->carryover)
7155             nbytes = sizeof coding->carryover;
7156           coding->carryover_bytes = nbytes;
7157           while (nbytes-- > 0)
7158             *p++ = *src++;
7159         }
7160       coding->consumed = coding->src_bytes;
7161     }
7162
7163   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7164       && !inhibit_eol_conversion)
7165     decode_eol (coding);
7166   if (BUFFERP (coding->dst_object))
7167     {
7168       current_buffer->undo_list = undo_list;
7169       record_insert (coding->dst_pos, coding->produced_char);
7170     }
7171   return coding->result;
7172 }
7173
7174
7175 /* Extract an annotation datum from a composition starting at POS and
7176    ending before LIMIT of CODING->src_object (buffer or string), store
7177    the data in BUF, set *STOP to a starting position of the next
7178    composition (if any) or to LIMIT, and return the address of the
7179    next element of BUF.
7180
7181    If such an annotation is not found, set *STOP to a starting
7182    position of a composition after POS (if any) or to LIMIT, and
7183    return BUF.  */
7184
7185 static INLINE int *
7186 handle_composition_annotation (EMACS_INT pos, EMACS_INT limit,
7187                                struct coding_system *coding, int *buf,
7188                                EMACS_INT *stop)
7189 {
7190   EMACS_INT start, end;
7191   Lisp_Object prop;
7192
7193   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7194       || end > limit)
7195     *stop = limit;
7196   else if (start > pos)
7197     *stop = start;
7198   else
7199     {
7200       if (start == pos)
7201         {
7202           /* We found a composition.  Store the corresponding
7203              annotation data in BUF.  */
7204           int *head = buf;
7205           enum composition_method method = COMPOSITION_METHOD (prop);
7206           int nchars = COMPOSITION_LENGTH (prop);
7207
7208           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7209           if (method != COMPOSITION_RELATIVE)
7210             {
7211               Lisp_Object components;
7212               int len, i, i_byte;
7213
7214               components = COMPOSITION_COMPONENTS (prop);
7215               if (VECTORP (components))
7216                 {
7217                   len = XVECTOR (components)->size;
7218                   for (i = 0; i < len; i++)
7219                     *buf++ = XINT (AREF (components, i));
7220                 }
7221               else if (STRINGP (components))
7222                 {
7223                   len = SCHARS (components);
7224                   i = i_byte = 0;
7225                   while (i < len)
7226                     {
7227                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7228                       buf++;
7229                     }
7230                 }
7231               else if (INTEGERP (components))
7232                 {
7233                   len = 1;
7234                   *buf++ = XINT (components);
7235                 }
7236               else if (CONSP (components))
7237                 {
7238                   for (len = 0; CONSP (components);
7239                        len++, components = XCDR (components))
7240                     *buf++ = XINT (XCAR (components));
7241                 }
7242               else
7243                 abort ();
7244               *head -= len;
7245             }
7246         }
7247
7248       if (find_composition (end, limit, &start, &end, &prop,
7249                             coding->src_object)
7250           && end <= limit)
7251         *stop = start;
7252       else
7253         *stop = limit;
7254     }
7255   return buf;
7256 }
7257
7258
7259 /* Extract an annotation datum from a text property `charset' at POS of
7260    CODING->src_object (buffer of string), store the data in BUF, set
7261    *STOP to the position where the value of `charset' property changes
7262    (limiting by LIMIT), and return the address of the next element of
7263    BUF.
7264
7265    If the property value is nil, set *STOP to the position where the
7266    property value is non-nil (limiting by LIMIT), and return BUF.  */
7267
7268 static INLINE int *
7269 handle_charset_annotation (EMACS_INT pos, EMACS_INT limit,
7270                            struct coding_system *coding, int *buf,
7271                            EMACS_INT *stop)
7272 {
7273   Lisp_Object val, next;
7274   int id;
7275
7276   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7277   if (! NILP (val) && CHARSETP (val))
7278     id = XINT (CHARSET_SYMBOL_ID (val));
7279   else
7280     id = -1;
7281   ADD_CHARSET_DATA (buf, 0, id);
7282   next = Fnext_single_property_change (make_number (pos), Qcharset,
7283                                        coding->src_object,
7284                                        make_number (limit));
7285   *stop = XINT (next);
7286   return buf;
7287 }
7288
7289
7290 static void
7291 consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7292                int max_lookup)
7293 {
7294   int *buf = coding->charbuf;
7295   int *buf_end = coding->charbuf + coding->charbuf_size;
7296   const unsigned char *src = coding->source + coding->consumed;
7297   const unsigned char *src_end = coding->source + coding->src_bytes;
7298   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7299   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7300   int multibytep = coding->src_multibyte;
7301   Lisp_Object eol_type;
7302   int c;
7303   EMACS_INT stop, stop_composition, stop_charset;
7304   int *lookup_buf = NULL;
7305
7306   if (! NILP (translation_table))
7307     lookup_buf = alloca (sizeof (int) * max_lookup);
7308
7309   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7310   if (VECTORP (eol_type))
7311     eol_type = Qunix;
7312
7313   /* Note: composition handling is not yet implemented.  */
7314   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7315
7316   if (NILP (coding->src_object))
7317     stop = stop_composition = stop_charset = end_pos;
7318   else
7319     {
7320       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7321         stop = stop_composition = pos;
7322       else
7323         stop = stop_composition = end_pos;
7324       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7325         stop = stop_charset = pos;
7326       else
7327         stop_charset = end_pos;
7328     }
7329
7330   /* Compensate for CRLF and conversion.  */
7331   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7332   while (buf < buf_end)
7333     {
7334       Lisp_Object trans;
7335
7336       if (pos == stop)
7337         {
7338           if (pos == end_pos)
7339             break;
7340           if (pos == stop_composition)
7341             buf = handle_composition_annotation (pos, end_pos, coding,
7342                                                  buf, &stop_composition);
7343           if (pos == stop_charset)
7344             buf = handle_charset_annotation (pos, end_pos, coding,
7345                                              buf, &stop_charset);
7346           stop = (stop_composition < stop_charset
7347                   ? stop_composition : stop_charset);
7348         }
7349
7350       if (! multibytep)
7351         {
7352           EMACS_INT bytes;
7353
7354           if (coding->encoder == encode_coding_raw_text
7355               || coding->encoder == encode_coding_ccl)
7356             c = *src++, pos++;
7357           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7358             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7359           else
7360             c = BYTE8_TO_CHAR (*src), src++, pos++;
7361         }
7362       else
7363         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7364       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7365         c = '\n';
7366       if (! EQ (eol_type, Qunix))
7367         {
7368           if (c == '\n')
7369             {
7370               if (EQ (eol_type, Qdos))
7371                 *buf++ = '\r';
7372               else
7373                 c = '\r';
7374             }
7375         }
7376
7377       trans = Qnil;
7378       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7379       if (NILP (trans))
7380         *buf++ = c;
7381       else
7382         {
7383           int from_nchars = 1, to_nchars = 1;
7384           int *lookup_buf_end;
7385           const unsigned char *p = src;
7386           int i;
7387
7388           lookup_buf[0] = c;
7389           for (i = 1; i < max_lookup && p < src_end; i++)
7390             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7391           lookup_buf_end = lookup_buf + i;
7392           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7393           if (INTEGERP (trans))
7394             c = XINT (trans);
7395           else if (CONSP (trans))
7396             {
7397               from_nchars = ASIZE (XCAR (trans));
7398               trans = XCDR (trans);
7399               if (INTEGERP (trans))
7400                 c = XINT (trans);
7401               else
7402                 {
7403                   to_nchars = ASIZE (trans);
7404                   if (buf + to_nchars > buf_end)
7405                     break;
7406                   c = XINT (AREF (trans, 0));
7407                 }
7408             }
7409           else
7410             break;
7411           *buf++ = c;
7412           for (i = 1; i < to_nchars; i++)
7413             *buf++ = XINT (AREF (trans, i));
7414           for (i = 1; i < from_nchars; i++, pos++)
7415             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7416         }
7417     }
7418
7419   coding->consumed = src - coding->source;
7420   coding->consumed_char = pos - coding->src_pos;
7421   coding->charbuf_used = buf - coding->charbuf;
7422   coding->chars_at_source = 0;
7423 }
7424
7425
7426 /* Encode the text at CODING->src_object into CODING->dst_object.
7427    CODING->src_object is a buffer or a string.
7428    CODING->dst_object is a buffer or nil.
7429
7430    If CODING->src_object is a buffer, it must be the current buffer.
7431    In this case, if CODING->src_pos is positive, it is a position of
7432    the source text in the buffer, otherwise. the source text is in the
7433    gap area of the buffer, and coding->src_pos specifies the offset of
7434    the text from GPT (which must be the same as PT).  If this is the
7435    same buffer as CODING->dst_object, CODING->src_pos must be
7436    negative and CODING should not have `pre-write-conversion'.
7437
7438    If CODING->src_object is a string, CODING should not have
7439    `pre-write-conversion'.
7440
7441    If CODING->dst_object is a buffer, the encoded data is inserted at
7442    the current point of that buffer.
7443
7444    If CODING->dst_object is nil, the encoded data is placed at the
7445    memory area specified by CODING->destination.  */
7446
7447 static int
7448 encode_coding (struct coding_system *coding)
7449 {
7450   Lisp_Object attrs;
7451   Lisp_Object translation_table;
7452   int max_lookup;
7453   struct ccl_spec cclspec;
7454
7455   attrs = CODING_ID_ATTRS (coding->id);
7456   if (coding->encoder == encode_coding_raw_text)
7457     translation_table = Qnil, max_lookup = 0;
7458   else
7459     translation_table = get_translation_table (attrs, 1, &max_lookup);
7460
7461   if (BUFFERP (coding->dst_object))
7462     {
7463       set_buffer_internal (XBUFFER (coding->dst_object));
7464       coding->dst_multibyte
7465         = ! NILP (current_buffer->enable_multibyte_characters);
7466     }
7467
7468   coding->consumed = coding->consumed_char = 0;
7469   coding->produced = coding->produced_char = 0;
7470   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7471   coding->errors = 0;
7472
7473   ALLOC_CONVERSION_WORK_AREA (coding);
7474
7475   if (coding->encoder == encode_coding_ccl)
7476     {
7477       coding->spec.ccl = &cclspec;
7478       setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7479     }
7480   do {
7481     coding_set_source (coding);
7482     consume_chars (coding, translation_table, max_lookup);
7483     coding_set_destination (coding);
7484     (*(coding->encoder)) (coding);
7485   } while (coding->consumed_char < coding->src_chars);
7486
7487   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7488     insert_from_gap (coding->produced_char, coding->produced);
7489
7490   return (coding->result);
7491 }
7492
7493
7494 /* Name (or base name) of work buffer for code conversion.  */
7495 static Lisp_Object Vcode_conversion_workbuf_name;
7496
7497 /* A working buffer used by the top level conversion.  Once it is
7498    created, it is never destroyed.  It has the name
7499    Vcode_conversion_workbuf_name.  The other working buffers are
7500    destroyed after the use is finished, and their names are modified
7501    versions of Vcode_conversion_workbuf_name.  */
7502 static Lisp_Object Vcode_conversion_reused_workbuf;
7503
7504 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7505 static int reused_workbuf_in_use;
7506
7507
7508 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7509    multibyteness of returning buffer.  */
7510
7511 static Lisp_Object
7512 make_conversion_work_buffer (int multibyte)
7513 {
7514   Lisp_Object name, workbuf;
7515   struct buffer *current;
7516
7517   if (reused_workbuf_in_use++)
7518     {
7519       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7520       workbuf = Fget_buffer_create (name);
7521     }
7522   else
7523     {
7524       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7525         Vcode_conversion_reused_workbuf
7526           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7527       workbuf = Vcode_conversion_reused_workbuf;
7528     }
7529   current = current_buffer;
7530   set_buffer_internal (XBUFFER (workbuf));
7531   /* We can't allow modification hooks to run in the work buffer.  For
7532      instance, directory_files_internal assumes that file decoding
7533      doesn't compile new regexps.  */
7534   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7535   Ferase_buffer ();
7536   current_buffer->undo_list = Qt;
7537   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7538   set_buffer_internal (current);
7539   return workbuf;
7540 }
7541
7542
7543 static Lisp_Object
7544 code_conversion_restore (Lisp_Object arg)
7545 {
7546   Lisp_Object current, workbuf;
7547   struct gcpro gcpro1;
7548
7549   GCPRO1 (arg);
7550   current = XCAR (arg);
7551   workbuf = XCDR (arg);
7552   if (! NILP (workbuf))
7553     {
7554       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7555         reused_workbuf_in_use = 0;
7556       else if (! NILP (Fbuffer_live_p (workbuf)))
7557         Fkill_buffer (workbuf);
7558     }
7559   set_buffer_internal (XBUFFER (current));
7560   UNGCPRO;
7561   return Qnil;
7562 }
7563
7564 Lisp_Object
7565 code_conversion_save (int with_work_buf, int multibyte)
7566 {
7567   Lisp_Object workbuf = Qnil;
7568
7569   if (with_work_buf)
7570     workbuf = make_conversion_work_buffer (multibyte);
7571   record_unwind_protect (code_conversion_restore,
7572                          Fcons (Fcurrent_buffer (), workbuf));
7573   return workbuf;
7574 }
7575
7576 int
7577 decode_coding_gap (struct coding_system *coding,
7578                    EMACS_INT chars, EMACS_INT bytes)
7579 {
7580   int count = SPECPDL_INDEX ();
7581   Lisp_Object attrs;
7582
7583   code_conversion_save (0, 0);
7584
7585   coding->src_object = Fcurrent_buffer ();
7586   coding->src_chars = chars;
7587   coding->src_bytes = bytes;
7588   coding->src_pos = -chars;
7589   coding->src_pos_byte = -bytes;
7590   coding->src_multibyte = chars < bytes;
7591   coding->dst_object = coding->src_object;
7592   coding->dst_pos = PT;
7593   coding->dst_pos_byte = PT_BYTE;
7594   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7595
7596   if (CODING_REQUIRE_DETECTION (coding))
7597     detect_coding (coding);
7598
7599   coding->mode |= CODING_MODE_LAST_BLOCK;
7600   current_buffer->text->inhibit_shrinking = 1;
7601   decode_coding (coding);
7602   current_buffer->text->inhibit_shrinking = 0;
7603
7604   attrs = CODING_ID_ATTRS (coding->id);
7605   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7606     {
7607       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7608       Lisp_Object val;
7609
7610       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7611       val = call1 (CODING_ATTR_POST_READ (attrs),
7612                    make_number (coding->produced_char));
7613       CHECK_NATNUM (val);
7614       coding->produced_char += Z - prev_Z;
7615       coding->produced += Z_BYTE - prev_Z_BYTE;
7616     }
7617
7618   unbind_to (count, Qnil);
7619   return coding->result;
7620 }
7621
7622 int
7623 encode_coding_gap (struct coding_system *coding,
7624                    EMACS_INT chars, EMACS_INT bytes)
7625 {
7626   int count = SPECPDL_INDEX ();
7627
7628   code_conversion_save (0, 0);
7629
7630   coding->src_object = Fcurrent_buffer ();
7631   coding->src_chars = chars;
7632   coding->src_bytes = bytes;
7633   coding->src_pos = -chars;
7634   coding->src_pos_byte = -bytes;
7635   coding->src_multibyte = chars < bytes;
7636   coding->dst_object = coding->src_object;
7637   coding->dst_pos = PT;
7638   coding->dst_pos_byte = PT_BYTE;
7639
7640   encode_coding (coding);
7641
7642   unbind_to (count, Qnil);
7643   return coding->result;
7644 }
7645
7646
7647 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7648    SRC_OBJECT into DST_OBJECT by coding context CODING.
7649
7650    SRC_OBJECT is a buffer, a string, or Qnil.
7651
7652    If it is a buffer, the text is at point of the buffer.  FROM and TO
7653    are positions in the buffer.
7654
7655    If it is a string, the text is at the beginning of the string.
7656    FROM and TO are indices to the string.
7657
7658    If it is nil, the text is at coding->source.  FROM and TO are
7659    indices to coding->source.
7660
7661    DST_OBJECT is a buffer, Qt, or Qnil.
7662
7663    If it is a buffer, the decoded text is inserted at point of the
7664    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7665    is deleted.
7666
7667    If it is Qt, a string is made from the decoded text, and
7668    set in CODING->dst_object.
7669
7670    If it is Qnil, the decoded text is stored at CODING->destination.
7671    The caller must allocate CODING->dst_bytes bytes at
7672    CODING->destination by xmalloc.  If the decoded text is longer than
7673    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7674  */
7675
7676 void
7677 decode_coding_object (struct coding_system *coding,
7678                       Lisp_Object src_object,
7679                       EMACS_INT from, EMACS_INT from_byte,
7680                       EMACS_INT to, EMACS_INT to_byte,
7681                       Lisp_Object dst_object)
7682 {
7683   int count = SPECPDL_INDEX ();
7684   unsigned char *destination;
7685   EMACS_INT dst_bytes;
7686   EMACS_INT chars = to - from;
7687   EMACS_INT bytes = to_byte - from_byte;
7688   Lisp_Object attrs;
7689   int saved_pt = -1, saved_pt_byte;
7690   int need_marker_adjustment = 0;
7691   Lisp_Object old_deactivate_mark;
7692
7693   old_deactivate_mark = Vdeactivate_mark;
7694
7695   if (NILP (dst_object))
7696     {
7697       destination = coding->destination;
7698       dst_bytes = coding->dst_bytes;
7699     }
7700
7701   coding->src_object = src_object;
7702   coding->src_chars = chars;
7703   coding->src_bytes = bytes;
7704   coding->src_multibyte = chars < bytes;
7705
7706   if (STRINGP (src_object))
7707     {
7708       coding->src_pos = from;
7709       coding->src_pos_byte = from_byte;
7710     }
7711   else if (BUFFERP (src_object))
7712     {
7713       set_buffer_internal (XBUFFER (src_object));
7714       if (from != GPT)
7715         move_gap_both (from, from_byte);
7716       if (EQ (src_object, dst_object))
7717         {
7718           struct Lisp_Marker *tail;
7719
7720           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7721             {
7722               tail->need_adjustment
7723                 = tail->charpos == (tail->insertion_type ? from : to);
7724               need_marker_adjustment |= tail->need_adjustment;
7725             }
7726           saved_pt = PT, saved_pt_byte = PT_BYTE;
7727           TEMP_SET_PT_BOTH (from, from_byte);
7728           current_buffer->text->inhibit_shrinking = 1;
7729           del_range_both (from, from_byte, to, to_byte, 1);
7730           coding->src_pos = -chars;
7731           coding->src_pos_byte = -bytes;
7732         }
7733       else
7734         {
7735           coding->src_pos = from;
7736           coding->src_pos_byte = from_byte;
7737         }
7738     }
7739
7740   if (CODING_REQUIRE_DETECTION (coding))
7741     detect_coding (coding);
7742   attrs = CODING_ID_ATTRS (coding->id);
7743
7744   if (EQ (dst_object, Qt)
7745       || (! NILP (CODING_ATTR_POST_READ (attrs))
7746           && NILP (dst_object)))
7747     {
7748       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7749       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7750       coding->dst_pos = BEG;
7751       coding->dst_pos_byte = BEG_BYTE;
7752     }
7753   else if (BUFFERP (dst_object))
7754     {
7755       code_conversion_save (0, 0);
7756       coding->dst_object = dst_object;
7757       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7758       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7759       coding->dst_multibyte
7760         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7761     }
7762   else
7763     {
7764       code_conversion_save (0, 0);
7765       coding->dst_object = Qnil;
7766       /* Most callers presume this will return a multibyte result, and they
7767          won't use `binary' or `raw-text' anyway, so let's not worry about
7768          CODING_FOR_UNIBYTE.  */
7769       coding->dst_multibyte = 1;
7770     }
7771
7772   decode_coding (coding);
7773
7774   if (BUFFERP (coding->dst_object))
7775     set_buffer_internal (XBUFFER (coding->dst_object));
7776
7777   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7778     {
7779       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7780       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7781       Lisp_Object val;
7782
7783       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7784       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7785               old_deactivate_mark);
7786       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7787                         make_number (coding->produced_char));
7788       UNGCPRO;
7789       CHECK_NATNUM (val);
7790       coding->produced_char += Z - prev_Z;
7791       coding->produced += Z_BYTE - prev_Z_BYTE;
7792     }
7793
7794   if (EQ (dst_object, Qt))
7795     {
7796       coding->dst_object = Fbuffer_string ();
7797     }
7798   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7799     {
7800       set_buffer_internal (XBUFFER (coding->dst_object));
7801       if (dst_bytes < coding->produced)
7802         {
7803           destination = xrealloc (destination, coding->produced);
7804           if (! destination)
7805             {
7806               record_conversion_result (coding,
7807                                         CODING_RESULT_INSUFFICIENT_MEM);
7808               unbind_to (count, Qnil);
7809               return;
7810             }
7811           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7812             move_gap_both (BEGV, BEGV_BYTE);
7813           memcpy (destination, BEGV_ADDR, coding->produced);
7814           coding->destination = destination;
7815         }
7816     }
7817
7818   if (saved_pt >= 0)
7819     {
7820       /* This is the case of:
7821          (BUFFERP (src_object) && EQ (src_object, dst_object))
7822          As we have moved PT while replacing the original buffer
7823          contents, we must recover it now.  */
7824       set_buffer_internal (XBUFFER (src_object));
7825       current_buffer->text->inhibit_shrinking = 0;
7826       if (saved_pt < from)
7827         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7828       else if (saved_pt < from + chars)
7829         TEMP_SET_PT_BOTH (from, from_byte);
7830       else if (! NILP (current_buffer->enable_multibyte_characters))
7831         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7832                           saved_pt_byte + (coding->produced - bytes));
7833       else
7834         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7835                           saved_pt_byte + (coding->produced - bytes));
7836
7837       if (need_marker_adjustment)
7838         {
7839           struct Lisp_Marker *tail;
7840
7841           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7842             if (tail->need_adjustment)
7843               {
7844                 tail->need_adjustment = 0;
7845                 if (tail->insertion_type)
7846                   {
7847                     tail->bytepos = from_byte;
7848                     tail->charpos = from;
7849                   }
7850                 else
7851                   {
7852                     tail->bytepos = from_byte + coding->produced;
7853                     tail->charpos
7854                       = (NILP (current_buffer->enable_multibyte_characters)
7855                          ? tail->bytepos : from + coding->produced_char);
7856                   }
7857               }
7858         }
7859     }
7860
7861   Vdeactivate_mark = old_deactivate_mark;
7862   unbind_to (count, coding->dst_object);
7863 }
7864
7865
7866 void
7867 encode_coding_object (struct coding_system *coding,
7868                       Lisp_Object src_object,
7869                       EMACS_INT from, EMACS_INT from_byte,
7870                       EMACS_INT to, EMACS_INT to_byte,
7871                       Lisp_Object dst_object)
7872 {
7873   int count = SPECPDL_INDEX ();
7874   EMACS_INT chars = to - from;
7875   EMACS_INT bytes = to_byte - from_byte;
7876   Lisp_Object attrs;
7877   int saved_pt = -1, saved_pt_byte;
7878   int need_marker_adjustment = 0;
7879   int kill_src_buffer = 0;
7880   Lisp_Object old_deactivate_mark;
7881
7882   old_deactivate_mark = Vdeactivate_mark;
7883
7884   coding->src_object = src_object;
7885   coding->src_chars = chars;
7886   coding->src_bytes = bytes;
7887   coding->src_multibyte = chars < bytes;
7888
7889   attrs = CODING_ID_ATTRS (coding->id);
7890
7891   if (EQ (src_object, dst_object))
7892     {
7893       struct Lisp_Marker *tail;
7894
7895       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7896         {
7897           tail->need_adjustment
7898             = tail->charpos == (tail->insertion_type ? from : to);
7899           need_marker_adjustment |= tail->need_adjustment;
7900         }
7901     }
7902
7903   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7904     {
7905       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7906       set_buffer_internal (XBUFFER (coding->src_object));
7907       if (STRINGP (src_object))
7908         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7909       else if (BUFFERP (src_object))
7910         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7911       else
7912         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7913
7914       if (EQ (src_object, dst_object))
7915         {
7916           set_buffer_internal (XBUFFER (src_object));
7917           saved_pt = PT, saved_pt_byte = PT_BYTE;
7918           del_range_both (from, from_byte, to, to_byte, 1);
7919           set_buffer_internal (XBUFFER (coding->src_object));
7920         }
7921
7922       {
7923         Lisp_Object args[3];
7924         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7925
7926         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7927                 old_deactivate_mark);
7928         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7929         args[1] = make_number (BEG);
7930         args[2] = make_number (Z);
7931         safe_call (3, args);
7932         UNGCPRO;
7933       }
7934       if (XBUFFER (coding->src_object) != current_buffer)
7935         kill_src_buffer = 1;
7936       coding->src_object = Fcurrent_buffer ();
7937       if (BEG != GPT)
7938         move_gap_both (BEG, BEG_BYTE);
7939       coding->src_chars = Z - BEG;
7940       coding->src_bytes = Z_BYTE - BEG_BYTE;
7941       coding->src_pos = BEG;
7942       coding->src_pos_byte = BEG_BYTE;
7943       coding->src_multibyte = Z < Z_BYTE;
7944     }
7945   else if (STRINGP (src_object))
7946     {
7947       code_conversion_save (0, 0);
7948       coding->src_pos = from;
7949       coding->src_pos_byte = from_byte;
7950     }
7951   else if (BUFFERP (src_object))
7952     {
7953       code_conversion_save (0, 0);
7954       set_buffer_internal (XBUFFER (src_object));
7955       if (EQ (src_object, dst_object))
7956         {
7957           saved_pt = PT, saved_pt_byte = PT_BYTE;
7958           coding->src_object = del_range_1 (from, to, 1, 1);
7959           coding->src_pos = 0;
7960           coding->src_pos_byte = 0;
7961         }
7962       else
7963         {
7964           if (from < GPT && to >= GPT)
7965             move_gap_both (from, from_byte);
7966           coding->src_pos = from;
7967           coding->src_pos_byte = from_byte;
7968         }
7969     }
7970   else
7971     code_conversion_save (0, 0);
7972
7973   if (BUFFERP (dst_object))
7974     {
7975       coding->dst_object = dst_object;
7976       if (EQ (src_object, dst_object))
7977         {
7978           coding->dst_pos = from;
7979           coding->dst_pos_byte = from_byte;
7980         }
7981       else
7982         {
7983           struct buffer *current = current_buffer;
7984
7985           set_buffer_temp (XBUFFER (dst_object));
7986           coding->dst_pos = PT;
7987           coding->dst_pos_byte = PT_BYTE;
7988           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7989           set_buffer_temp (current);
7990         }
7991       coding->dst_multibyte
7992         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7993     }
7994   else if (EQ (dst_object, Qt))
7995     {
7996       coding->dst_object = Qnil;
7997       coding->dst_bytes = coding->src_chars;
7998       if (coding->dst_bytes == 0)
7999         coding->dst_bytes = 1;
8000       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
8001       coding->dst_multibyte = 0;
8002     }
8003   else
8004     {
8005       coding->dst_object = Qnil;
8006       coding->dst_multibyte = 0;
8007     }
8008
8009   encode_coding (coding);
8010
8011   if (EQ (dst_object, Qt))
8012     {
8013       if (BUFFERP (coding->dst_object))
8014         coding->dst_object = Fbuffer_string ();
8015       else
8016         {
8017           coding->dst_object
8018             = make_unibyte_string ((char *) coding->destination,
8019                                    coding->produced);
8020           xfree (coding->destination);
8021         }
8022     }
8023
8024   if (saved_pt >= 0)
8025     {
8026       /* This is the case of:
8027          (BUFFERP (src_object) && EQ (src_object, dst_object))
8028          As we have moved PT while replacing the original buffer
8029          contents, we must recover it now.  */
8030       set_buffer_internal (XBUFFER (src_object));
8031       if (saved_pt < from)
8032         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8033       else if (saved_pt < from + chars)
8034         TEMP_SET_PT_BOTH (from, from_byte);
8035       else if (! NILP (current_buffer->enable_multibyte_characters))
8036         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8037                           saved_pt_byte + (coding->produced - bytes));
8038       else
8039         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8040                           saved_pt_byte + (coding->produced - bytes));
8041
8042       if (need_marker_adjustment)
8043         {
8044           struct Lisp_Marker *tail;
8045
8046           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8047             if (tail->need_adjustment)
8048               {
8049                 tail->need_adjustment = 0;
8050                 if (tail->insertion_type)
8051                   {
8052                     tail->bytepos = from_byte;
8053                     tail->charpos = from;
8054                   }
8055                 else
8056                   {
8057                     tail->bytepos = from_byte + coding->produced;
8058                     tail->charpos
8059                       = (NILP (current_buffer->enable_multibyte_characters)
8060                          ? tail->bytepos : from + coding->produced_char);
8061                   }
8062               }
8063         }
8064     }
8065
8066   if (kill_src_buffer)
8067     Fkill_buffer (coding->src_object);
8068
8069   Vdeactivate_mark = old_deactivate_mark;
8070   unbind_to (count, Qnil);
8071 }
8072
8073
8074 Lisp_Object
8075 preferred_coding_system (void)
8076 {
8077   int id = coding_categories[coding_priorities[0]].id;
8078
8079   return CODING_ID_NAME (id);
8080 }
8081
8082 \f
8083 #ifdef emacs
8084 /*** 8. Emacs Lisp library functions ***/
8085
8086 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8087        doc: /* Return t if OBJECT is nil or a coding-system.
8088 See the documentation of `define-coding-system' for information
8089 about coding-system objects.  */)
8090   (Lisp_Object object)
8091 {
8092   if (NILP (object)
8093       || CODING_SYSTEM_ID (object) >= 0)
8094     return Qt;
8095   if (! SYMBOLP (object)
8096       || NILP (Fget (object, Qcoding_system_define_form)))
8097     return Qnil;
8098   return Qt;
8099 }
8100
8101 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8102        Sread_non_nil_coding_system, 1, 1, 0,
8103        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8104   (Lisp_Object prompt)
8105 {
8106   Lisp_Object val;
8107   do
8108     {
8109       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8110                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8111     }
8112   while (SCHARS (val) == 0);
8113   return (Fintern (val, Qnil));
8114 }
8115
8116 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8117        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8118 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8119 Ignores case when completing coding systems (all Emacs coding systems
8120 are lower-case).  */)
8121   (Lisp_Object prompt, Lisp_Object default_coding_system)
8122 {
8123   Lisp_Object val;
8124   int count = SPECPDL_INDEX ();
8125
8126   if (SYMBOLP (default_coding_system))
8127     default_coding_system = SYMBOL_NAME (default_coding_system);
8128   specbind (Qcompletion_ignore_case, Qt);
8129   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8130                           Qt, Qnil, Qcoding_system_history,
8131                           default_coding_system, Qnil);
8132   unbind_to (count, Qnil);
8133   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8134 }
8135
8136 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8137        1, 1, 0,
8138        doc: /* Check validity of CODING-SYSTEM.
8139 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8140 It is valid if it is nil or a symbol defined as a coding system by the
8141 function `define-coding-system'.  */)
8142   (Lisp_Object coding_system)
8143 {
8144   Lisp_Object define_form;
8145
8146   define_form = Fget (coding_system, Qcoding_system_define_form);
8147   if (! NILP (define_form))
8148     {
8149       Fput (coding_system, Qcoding_system_define_form, Qnil);
8150       safe_eval (define_form);
8151     }
8152   if (!NILP (Fcoding_system_p (coding_system)))
8153     return coding_system;
8154   xsignal1 (Qcoding_system_error, coding_system);
8155 }
8156
8157 \f
8158 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8159    HIGHEST is nonzero, return the coding system of the highest
8160    priority among the detected coding systems.  Otherwize return a
8161    list of detected coding systems sorted by their priorities.  If
8162    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8163    multibyte form but contains only ASCII and eight-bit chars.
8164    Otherwise, the bytes are raw bytes.
8165
8166    CODING-SYSTEM controls the detection as below:
8167
8168    If it is nil, detect both text-format and eol-format.  If the
8169    text-format part of CODING-SYSTEM is already specified
8170    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8171    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8172    detect only text-format.  */
8173
8174 Lisp_Object
8175 detect_coding_system (const unsigned char *src,
8176                       EMACS_INT src_chars, EMACS_INT src_bytes,
8177                       int highest, int multibytep,
8178                       Lisp_Object coding_system)
8179 {
8180   const unsigned char *src_end = src + src_bytes;
8181   Lisp_Object attrs, eol_type;
8182   Lisp_Object val = Qnil;
8183   struct coding_system coding;
8184   int id;
8185   struct coding_detection_info detect_info;
8186   enum coding_category base_category;
8187   int null_byte_found = 0, eight_bit_found = 0;
8188
8189   if (NILP (coding_system))
8190     coding_system = Qundecided;
8191   setup_coding_system (coding_system, &coding);
8192   attrs = CODING_ID_ATTRS (coding.id);
8193   eol_type = CODING_ID_EOL_TYPE (coding.id);
8194   coding_system = CODING_ATTR_BASE_NAME (attrs);
8195
8196   coding.source = src;
8197   coding.src_chars = src_chars;
8198   coding.src_bytes = src_bytes;
8199   coding.src_multibyte = multibytep;
8200   coding.consumed = 0;
8201   coding.mode |= CODING_MODE_LAST_BLOCK;
8202   coding.head_ascii = 0;
8203
8204   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8205
8206   /* At first, detect text-format if necessary.  */
8207   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8208   if (base_category == coding_category_undecided)
8209     {
8210       enum coding_category category;
8211       struct coding_system *this;
8212       int c, i;
8213
8214       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8215       for (; src < src_end; src++)
8216         {
8217           c = *src;
8218           if (c & 0x80)
8219             {
8220               eight_bit_found = 1;
8221               if (null_byte_found)
8222                 break;
8223             }
8224           else if (c < 0x20)
8225             {
8226               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8227                   && ! inhibit_iso_escape_detection
8228                   && ! detect_info.checked)
8229                 {
8230                   if (detect_coding_iso_2022 (&coding, &detect_info))
8231                     {
8232                       /* We have scanned the whole data.  */
8233                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8234                         {
8235                           /* We didn't find an 8-bit code.  We may
8236                              have found a null-byte, but it's very
8237                              rare that a binary file confirm to
8238                              ISO-2022.  */
8239                           src = src_end;
8240                           coding.head_ascii = src - coding.source;
8241                         }
8242                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8243                       break;
8244                     }
8245                 }
8246               else if (! c && !inhibit_null_byte_detection)
8247                 {
8248                   null_byte_found = 1;
8249                   if (eight_bit_found)
8250                     break;
8251                 }
8252               if (! eight_bit_found)
8253                 coding.head_ascii++;
8254             }
8255           else if (! eight_bit_found)
8256             coding.head_ascii++;
8257         }
8258
8259       if (null_byte_found || eight_bit_found
8260           || coding.head_ascii < coding.src_bytes
8261           || detect_info.found)
8262         {
8263           if (coding.head_ascii == coding.src_bytes)
8264             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8265             for (i = 0; i < coding_category_raw_text; i++)
8266               {
8267                 category = coding_priorities[i];
8268                 this = coding_categories + category;
8269                 if (detect_info.found & (1 << category))
8270                   break;
8271               }
8272           else
8273             {
8274               if (null_byte_found)
8275                 {
8276                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8277                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8278                 }
8279               for (i = 0; i < coding_category_raw_text; i++)
8280                 {
8281                   category = coding_priorities[i];
8282                   this = coding_categories + category;
8283
8284                   if (this->id < 0)
8285                     {
8286                       /* No coding system of this category is defined.  */
8287                       detect_info.rejected |= (1 << category);
8288                     }
8289                   else if (category >= coding_category_raw_text)
8290                     continue;
8291                   else if (detect_info.checked & (1 << category))
8292                     {
8293                       if (highest
8294                           && (detect_info.found & (1 << category)))
8295                         break;
8296                     }
8297                   else if ((*(this->detector)) (&coding, &detect_info)
8298                            && highest
8299                            && (detect_info.found & (1 << category)))
8300                     {
8301                       if (category == coding_category_utf_16_auto)
8302                         {
8303                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8304                             category = coding_category_utf_16_le;
8305                           else
8306                             category = coding_category_utf_16_be;
8307                         }
8308                       break;
8309                     }
8310                 }
8311             }
8312         }
8313
8314       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8315           || null_byte_found)
8316         {
8317           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8318           id = CODING_SYSTEM_ID (Qno_conversion);
8319           val = Fcons (make_number (id), Qnil);
8320         }
8321       else if (! detect_info.rejected && ! detect_info.found)
8322         {
8323           detect_info.found = CATEGORY_MASK_ANY;
8324           id = coding_categories[coding_category_undecided].id;
8325           val = Fcons (make_number (id), Qnil);
8326         }
8327       else if (highest)
8328         {
8329           if (detect_info.found)
8330             {
8331               detect_info.found = 1 << category;
8332               val = Fcons (make_number (this->id), Qnil);
8333             }
8334           else
8335             for (i = 0; i < coding_category_raw_text; i++)
8336               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8337                 {
8338                   detect_info.found = 1 << coding_priorities[i];
8339                   id = coding_categories[coding_priorities[i]].id;
8340                   val = Fcons (make_number (id), Qnil);
8341                   break;
8342                 }
8343         }
8344       else
8345         {
8346           int mask = detect_info.rejected | detect_info.found;
8347           int found = 0;
8348
8349           for (i = coding_category_raw_text - 1; i >= 0; i--)
8350             {
8351               category = coding_priorities[i];
8352               if (! (mask & (1 << category)))
8353                 {
8354                   found |= 1 << category;
8355                   id = coding_categories[category].id;
8356                   if (id >= 0)
8357                     val = Fcons (make_number (id), val);
8358                 }
8359             }
8360           for (i = coding_category_raw_text - 1; i >= 0; i--)
8361             {
8362               category = coding_priorities[i];
8363               if (detect_info.found & (1 << category))
8364                 {
8365                   id = coding_categories[category].id;
8366                   val = Fcons (make_number (id), val);
8367                 }
8368             }
8369           detect_info.found |= found;
8370         }
8371     }
8372   else if (base_category == coding_category_utf_8_auto)
8373     {
8374       if (detect_coding_utf_8 (&coding, &detect_info))
8375         {
8376           struct coding_system *this;
8377
8378           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8379             this = coding_categories + coding_category_utf_8_sig;
8380           else
8381             this = coding_categories + coding_category_utf_8_nosig;
8382           val = Fcons (make_number (this->id), Qnil);
8383         }
8384     }
8385   else if (base_category == coding_category_utf_16_auto)
8386     {
8387       if (detect_coding_utf_16 (&coding, &detect_info))
8388         {
8389           struct coding_system *this;
8390
8391           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8392             this = coding_categories + coding_category_utf_16_le;
8393           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8394             this = coding_categories + coding_category_utf_16_be;
8395           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8396             this = coding_categories + coding_category_utf_16_be_nosig;
8397           else
8398             this = coding_categories + coding_category_utf_16_le_nosig;
8399           val = Fcons (make_number (this->id), Qnil);
8400         }
8401     }
8402   else
8403     {
8404       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8405       val = Fcons (make_number (coding.id), Qnil);
8406     }
8407
8408   /* Then, detect eol-format if necessary.  */
8409   {
8410     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8411     Lisp_Object tail;
8412
8413     if (VECTORP (eol_type))
8414       {
8415         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8416           {
8417             if (null_byte_found)
8418               normal_eol = EOL_SEEN_LF;
8419             else
8420               normal_eol = detect_eol (coding.source, src_bytes,
8421                                        coding_category_raw_text);
8422           }
8423         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8424                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8425           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8426                                       coding_category_utf_16_be);
8427         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8428                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8429           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8430                                       coding_category_utf_16_le);
8431       }
8432     else
8433       {
8434         if (EQ (eol_type, Qunix))
8435           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8436         else if (EQ (eol_type, Qdos))
8437           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8438         else
8439           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8440       }
8441
8442     for (tail = val; CONSP (tail); tail = XCDR (tail))
8443       {
8444         enum coding_category category;
8445         int this_eol;
8446
8447         id = XINT (XCAR (tail));
8448         attrs = CODING_ID_ATTRS (id);
8449         category = XINT (CODING_ATTR_CATEGORY (attrs));
8450         eol_type = CODING_ID_EOL_TYPE (id);
8451         if (VECTORP (eol_type))
8452           {
8453             if (category == coding_category_utf_16_be
8454                 || category == coding_category_utf_16_be_nosig)
8455               this_eol = utf_16_be_eol;
8456             else if (category == coding_category_utf_16_le
8457                      || category == coding_category_utf_16_le_nosig)
8458               this_eol = utf_16_le_eol;
8459             else
8460               this_eol = normal_eol;
8461
8462             if (this_eol == EOL_SEEN_LF)
8463               XSETCAR (tail, AREF (eol_type, 0));
8464             else if (this_eol == EOL_SEEN_CRLF)
8465               XSETCAR (tail, AREF (eol_type, 1));
8466             else if (this_eol == EOL_SEEN_CR)
8467               XSETCAR (tail, AREF (eol_type, 2));
8468             else
8469               XSETCAR (tail, CODING_ID_NAME (id));
8470           }
8471         else
8472           XSETCAR (tail, CODING_ID_NAME (id));
8473       }
8474   }
8475
8476   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8477 }
8478
8479
8480 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8481        2, 3, 0,
8482        doc: /* Detect coding system of the text in the region between START and END.
8483 Return a list of possible coding systems ordered by priority.
8484 The coding systems to try and their priorities follows what
8485 the function `coding-system-priority-list' (which see) returns.
8486
8487 If only ASCII characters are found (except for such ISO-2022 control
8488 characters as ESC), it returns a list of single element `undecided'
8489 or its subsidiary coding system according to a detected end-of-line
8490 format.
8491
8492 If optional argument HIGHEST is non-nil, return the coding system of
8493 highest priority.  */)
8494   (Lisp_Object start, Lisp_Object end, Lisp_Object highest)
8495 {
8496   int from, to;
8497   int from_byte, to_byte;
8498
8499   CHECK_NUMBER_COERCE_MARKER (start);
8500   CHECK_NUMBER_COERCE_MARKER (end);
8501
8502   validate_region (&start, &end);
8503   from = XINT (start), to = XINT (end);
8504   from_byte = CHAR_TO_BYTE (from);
8505   to_byte = CHAR_TO_BYTE (to);
8506
8507   if (from < GPT && to >= GPT)
8508     move_gap_both (to, to_byte);
8509
8510   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8511                                to - from, to_byte - from_byte,
8512                                !NILP (highest),
8513                                !NILP (current_buffer
8514                                       ->enable_multibyte_characters),
8515                                Qnil);
8516 }
8517
8518 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8519        1, 2, 0,
8520        doc: /* Detect coding system of the text in STRING.
8521 Return a list of possible coding systems ordered by priority.
8522 The coding systems to try and their priorities follows what
8523 the function `coding-system-priority-list' (which see) returns.
8524
8525 If only ASCII characters are found (except for such ISO-2022 control
8526 characters as ESC), it returns a list of single element `undecided'
8527 or its subsidiary coding system according to a detected end-of-line
8528 format.
8529
8530 If optional argument HIGHEST is non-nil, return the coding system of
8531 highest priority.  */)
8532   (Lisp_Object string, Lisp_Object highest)
8533 {
8534   CHECK_STRING (string);
8535
8536   return detect_coding_system (SDATA (string),
8537                                SCHARS (string), SBYTES (string),
8538                                !NILP (highest), STRING_MULTIBYTE (string),
8539                                Qnil);
8540 }
8541
8542
8543 static INLINE int
8544 char_encodable_p (int c, Lisp_Object attrs)
8545 {
8546   Lisp_Object tail;
8547   struct charset *charset;
8548   Lisp_Object translation_table;
8549
8550   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8551   if (! NILP (translation_table))
8552     c = translate_char (translation_table, c);
8553   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8554        CONSP (tail); tail = XCDR (tail))
8555     {
8556       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8557       if (CHAR_CHARSET_P (c, charset))
8558         break;
8559     }
8560   return (! NILP (tail));
8561 }
8562
8563
8564 /* Return a list of coding systems that safely encode the text between
8565    START and END.  If EXCLUDE is non-nil, it is a list of coding
8566    systems not to check.  The returned list doesn't contain any such
8567    coding systems.  In any case, if the text contains only ASCII or is
8568    unibyte, return t.  */
8569
8570 DEFUN ("find-coding-systems-region-internal",
8571        Ffind_coding_systems_region_internal,
8572        Sfind_coding_systems_region_internal, 2, 3, 0,
8573        doc: /* Internal use only.  */)
8574   (Lisp_Object start, Lisp_Object end, Lisp_Object exclude)
8575 {
8576   Lisp_Object coding_attrs_list, safe_codings;
8577   EMACS_INT start_byte, end_byte;
8578   const unsigned char *p, *pbeg, *pend;
8579   int c;
8580   Lisp_Object tail, elt, work_table;
8581
8582   if (STRINGP (start))
8583     {
8584       if (!STRING_MULTIBYTE (start)
8585           || SCHARS (start) == SBYTES (start))
8586         return Qt;
8587       start_byte = 0;
8588       end_byte = SBYTES (start);
8589     }
8590   else
8591     {
8592       CHECK_NUMBER_COERCE_MARKER (start);
8593       CHECK_NUMBER_COERCE_MARKER (end);
8594       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8595         args_out_of_range (start, end);
8596       if (NILP (current_buffer->enable_multibyte_characters))
8597         return Qt;
8598       start_byte = CHAR_TO_BYTE (XINT (start));
8599       end_byte = CHAR_TO_BYTE (XINT (end));
8600       if (XINT (end) - XINT (start) == end_byte - start_byte)
8601         return Qt;
8602
8603       if (XINT (start) < GPT && XINT (end) > GPT)
8604         {
8605           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8606             move_gap_both (XINT (start), start_byte);
8607           else
8608             move_gap_both (XINT (end), end_byte);
8609         }
8610     }
8611
8612   coding_attrs_list = Qnil;
8613   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8614     if (NILP (exclude)
8615         || NILP (Fmemq (XCAR (tail), exclude)))
8616       {
8617         Lisp_Object attrs;
8618
8619         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8620         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8621             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8622           {
8623             ASET (attrs, coding_attr_trans_tbl,
8624                   get_translation_table (attrs, 1, NULL));
8625             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8626           }
8627       }
8628
8629   if (STRINGP (start))
8630     p = pbeg = SDATA (start);
8631   else
8632     p = pbeg = BYTE_POS_ADDR (start_byte);
8633   pend = p + (end_byte - start_byte);
8634
8635   while (p < pend && ASCII_BYTE_P (*p)) p++;
8636   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8637
8638   work_table = Fmake_char_table (Qnil, Qnil);
8639   while (p < pend)
8640     {
8641       if (ASCII_BYTE_P (*p))
8642         p++;
8643       else
8644         {
8645           c = STRING_CHAR_ADVANCE (p);
8646           if (!NILP (char_table_ref (work_table, c)))
8647             /* This character was already checked.  Ignore it.  */
8648             continue;
8649
8650           charset_map_loaded = 0;
8651           for (tail = coding_attrs_list; CONSP (tail);)
8652             {
8653               elt = XCAR (tail);
8654               if (NILP (elt))
8655                 tail = XCDR (tail);
8656               else if (char_encodable_p (c, elt))
8657                 tail = XCDR (tail);
8658               else if (CONSP (XCDR (tail)))
8659                 {
8660                   XSETCAR (tail, XCAR (XCDR (tail)));
8661                   XSETCDR (tail, XCDR (XCDR (tail)));
8662                 }
8663               else
8664                 {
8665                   XSETCAR (tail, Qnil);
8666                   tail = XCDR (tail);
8667                 }
8668             }
8669           if (charset_map_loaded)
8670             {
8671               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8672
8673               if (STRINGP (start))
8674                 pbeg = SDATA (start);
8675               else
8676                 pbeg = BYTE_POS_ADDR (start_byte);
8677               p = pbeg + p_offset;
8678               pend = pbeg + pend_offset;
8679             }
8680           char_table_set (work_table, c, Qt);
8681         }
8682     }
8683
8684   safe_codings = list2 (Qraw_text, Qno_conversion);
8685   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8686     if (! NILP (XCAR (tail)))
8687       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8688
8689   return safe_codings;
8690 }
8691
8692
8693 DEFUN ("unencodable-char-position", Funencodable_char_position,
8694        Sunencodable_char_position, 3, 5, 0,
8695        doc: /*
8696 Return position of first un-encodable character in a region.
8697 START and END specify the region and CODING-SYSTEM specifies the
8698 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8699
8700 If optional 4th argument COUNT is non-nil, it specifies at most how
8701 many un-encodable characters to search.  In this case, the value is a
8702 list of positions.
8703
8704 If optional 5th argument STRING is non-nil, it is a string to search
8705 for un-encodable characters.  In that case, START and END are indexes
8706 to the string.  */)
8707   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string)
8708 {
8709   int n;
8710   struct coding_system coding;
8711   Lisp_Object attrs, charset_list, translation_table;
8712   Lisp_Object positions;
8713   int from, to;
8714   const unsigned char *p, *stop, *pend;
8715   int ascii_compatible;
8716
8717   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8718   attrs = CODING_ID_ATTRS (coding.id);
8719   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8720     return Qnil;
8721   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8722   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8723   translation_table = get_translation_table (attrs, 1, NULL);
8724
8725   if (NILP (string))
8726     {
8727       validate_region (&start, &end);
8728       from = XINT (start);
8729       to = XINT (end);
8730       if (NILP (current_buffer->enable_multibyte_characters)
8731           || (ascii_compatible
8732               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8733         return Qnil;
8734       p = CHAR_POS_ADDR (from);
8735       pend = CHAR_POS_ADDR (to);
8736       if (from < GPT && to >= GPT)
8737         stop = GPT_ADDR;
8738       else
8739         stop = pend;
8740     }
8741   else
8742     {
8743       CHECK_STRING (string);
8744       CHECK_NATNUM (start);
8745       CHECK_NATNUM (end);
8746       from = XINT (start);
8747       to = XINT (end);
8748       if (from > to
8749           || to > SCHARS (string))
8750         args_out_of_range_3 (string, start, end);
8751       if (! STRING_MULTIBYTE (string))
8752         return Qnil;
8753       p = SDATA (string) + string_char_to_byte (string, from);
8754       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8755       if (ascii_compatible && (to - from) == (pend - p))
8756         return Qnil;
8757     }
8758
8759   if (NILP (count))
8760     n = 1;
8761   else
8762     {
8763       CHECK_NATNUM (count);
8764       n = XINT (count);
8765     }
8766
8767   positions = Qnil;
8768   while (1)
8769     {
8770       int c;
8771
8772       if (ascii_compatible)
8773         while (p < stop && ASCII_BYTE_P (*p))
8774           p++, from++;
8775       if (p >= stop)
8776         {
8777           if (p >= pend)
8778             break;
8779           stop = pend;
8780           p = GAP_END_ADDR;
8781         }
8782
8783       c = STRING_CHAR_ADVANCE (p);
8784       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8785           && ! char_charset (translate_char (translation_table, c),
8786                              charset_list, NULL))
8787         {
8788           positions = Fcons (make_number (from), positions);
8789           n--;
8790           if (n == 0)
8791             break;
8792         }
8793
8794       from++;
8795     }
8796
8797   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8798 }
8799
8800
8801 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8802        Scheck_coding_systems_region, 3, 3, 0,
8803        doc: /* Check if the region is encodable by coding systems.
8804
8805 START and END are buffer positions specifying the region.
8806 CODING-SYSTEM-LIST is a list of coding systems to check.
8807
8808 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8809 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8810 whole region, POS0, POS1, ... are buffer positions where non-encodable
8811 characters are found.
8812
8813 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8814 value is nil.
8815
8816 START may be a string.  In that case, check if the string is
8817 encodable, and the value contains indices to the string instead of
8818 buffer positions.  END is ignored.
8819
8820 If the current buffer (or START if it is a string) is unibyte, the value
8821 is nil.  */)
8822   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system_list)
8823 {
8824   Lisp_Object list;
8825   EMACS_INT start_byte, end_byte;
8826   int pos;
8827   const unsigned char *p, *pbeg, *pend;
8828   int c;
8829   Lisp_Object tail, elt, attrs;
8830
8831   if (STRINGP (start))
8832     {
8833       if (!STRING_MULTIBYTE (start)
8834           || SCHARS (start) == SBYTES (start))
8835         return Qnil;
8836       start_byte = 0;
8837       end_byte = SBYTES (start);
8838       pos = 0;
8839     }
8840   else
8841     {
8842       CHECK_NUMBER_COERCE_MARKER (start);
8843       CHECK_NUMBER_COERCE_MARKER (end);
8844       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8845         args_out_of_range (start, end);
8846       if (NILP (current_buffer->enable_multibyte_characters))
8847         return Qnil;
8848       start_byte = CHAR_TO_BYTE (XINT (start));
8849       end_byte = CHAR_TO_BYTE (XINT (end));
8850       if (XINT (end) - XINT (start) == end_byte - start_byte)
8851         return Qnil;
8852
8853       if (XINT (start) < GPT && XINT (end) > GPT)
8854         {
8855           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8856             move_gap_both (XINT (start), start_byte);
8857           else
8858             move_gap_both (XINT (end), end_byte);
8859         }
8860       pos = XINT (start);
8861     }
8862
8863   list = Qnil;
8864   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8865     {
8866       elt = XCAR (tail);
8867       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8868       ASET (attrs, coding_attr_trans_tbl,
8869             get_translation_table (attrs, 1, NULL));
8870       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8871     }
8872
8873   if (STRINGP (start))
8874     p = pbeg = SDATA (start);
8875   else
8876     p = pbeg = BYTE_POS_ADDR (start_byte);
8877   pend = p + (end_byte - start_byte);
8878
8879   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8880   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8881
8882   while (p < pend)
8883     {
8884       if (ASCII_BYTE_P (*p))
8885         p++;
8886       else
8887         {
8888           c = STRING_CHAR_ADVANCE (p);
8889
8890           charset_map_loaded = 0;
8891           for (tail = list; CONSP (tail); tail = XCDR (tail))
8892             {
8893               elt = XCDR (XCAR (tail));
8894               if (! char_encodable_p (c, XCAR (elt)))
8895                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8896             }
8897           if (charset_map_loaded)
8898             {
8899               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8900
8901               if (STRINGP (start))
8902                 pbeg = SDATA (start);
8903               else
8904                 pbeg = BYTE_POS_ADDR (start_byte);
8905               p = pbeg + p_offset;
8906               pend = pbeg + pend_offset;
8907             }
8908         }
8909       pos++;
8910     }
8911
8912   tail = list;
8913   list = Qnil;
8914   for (; CONSP (tail); tail = XCDR (tail))
8915     {
8916       elt = XCAR (tail);
8917       if (CONSP (XCDR (XCDR (elt))))
8918         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8919                       list);
8920     }
8921
8922   return list;
8923 }
8924
8925
8926 Lisp_Object
8927 code_convert_region (Lisp_Object start, Lisp_Object end,
8928                      Lisp_Object coding_system, Lisp_Object dst_object,
8929                      int encodep, int norecord)
8930 {
8931   struct coding_system coding;
8932   EMACS_INT from, from_byte, to, to_byte;
8933   Lisp_Object src_object;
8934
8935   CHECK_NUMBER_COERCE_MARKER (start);
8936   CHECK_NUMBER_COERCE_MARKER (end);
8937   if (NILP (coding_system))
8938     coding_system = Qno_conversion;
8939   else
8940     CHECK_CODING_SYSTEM (coding_system);
8941   src_object = Fcurrent_buffer ();
8942   if (NILP (dst_object))
8943     dst_object = src_object;
8944   else if (! EQ (dst_object, Qt))
8945     CHECK_BUFFER (dst_object);
8946
8947   validate_region (&start, &end);
8948   from = XFASTINT (start);
8949   from_byte = CHAR_TO_BYTE (from);
8950   to = XFASTINT (end);
8951   to_byte = CHAR_TO_BYTE (to);
8952
8953   setup_coding_system (coding_system, &coding);
8954   coding.mode |= CODING_MODE_LAST_BLOCK;
8955
8956   if (encodep)
8957     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8958                           dst_object);
8959   else
8960     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8961                           dst_object);
8962   if (! norecord)
8963     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8964
8965   return (BUFFERP (dst_object)
8966           ? make_number (coding.produced_char)
8967           : coding.dst_object);
8968 }
8969
8970
8971 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8972        3, 4, "r\nzCoding system: ",
8973        doc: /* Decode the current region from the specified coding system.
8974 When called from a program, takes four arguments:
8975         START, END, CODING-SYSTEM, and DESTINATION.
8976 START and END are buffer positions.
8977
8978 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8979 If nil, the region between START and END is replaced by the decoded text.
8980 If buffer, the decoded text is inserted in that buffer after point (point
8981 does not move).
8982 In those cases, the length of the decoded text is returned.
8983 If DESTINATION is t, the decoded text is returned.
8984
8985 This function sets `last-coding-system-used' to the precise coding system
8986 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8987 not fully specified.)  */)
8988   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
8989 {
8990   return code_convert_region (start, end, coding_system, destination, 0, 0);
8991 }
8992
8993 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8994        3, 4, "r\nzCoding system: ",
8995        doc: /* Encode the current region by specified coding system.
8996 When called from a program, takes four arguments:
8997         START, END, CODING-SYSTEM and DESTINATION.
8998 START and END are buffer positions.
8999
9000 Optional 4th arguments DESTINATION specifies where the encoded text goes.
9001 If nil, the region between START and END is replace by the encoded text.
9002 If buffer, the encoded text is inserted in that buffer after point (point
9003 does not move).
9004 In those cases, the length of the encoded text is returned.
9005 If DESTINATION is t, the encoded text is returned.
9006
9007 This function sets `last-coding-system-used' to the precise coding system
9008 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9009 not fully specified.)  */)
9010   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
9011 {
9012   return code_convert_region (start, end, coding_system, destination, 1, 0);
9013 }
9014
9015 Lisp_Object
9016 code_convert_string (Lisp_Object string, Lisp_Object coding_system,
9017                      Lisp_Object dst_object, int encodep, int nocopy, int norecord)
9018 {
9019   struct coding_system coding;
9020   EMACS_INT chars, bytes;
9021
9022   CHECK_STRING (string);
9023   if (NILP (coding_system))
9024     {
9025       if (! norecord)
9026         Vlast_coding_system_used = Qno_conversion;
9027       if (NILP (dst_object))
9028         return (nocopy ? Fcopy_sequence (string) : string);
9029     }
9030
9031   if (NILP (coding_system))
9032     coding_system = Qno_conversion;
9033   else
9034     CHECK_CODING_SYSTEM (coding_system);
9035   if (NILP (dst_object))
9036     dst_object = Qt;
9037   else if (! EQ (dst_object, Qt))
9038     CHECK_BUFFER (dst_object);
9039
9040   setup_coding_system (coding_system, &coding);
9041   coding.mode |= CODING_MODE_LAST_BLOCK;
9042   chars = SCHARS (string);
9043   bytes = SBYTES (string);
9044   if (encodep)
9045     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9046   else
9047     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9048   if (! norecord)
9049     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9050
9051   return (BUFFERP (dst_object)
9052           ? make_number (coding.produced_char)
9053           : coding.dst_object);
9054 }
9055
9056
9057 /* Encode or decode STRING according to CODING_SYSTEM.
9058    Do not set Vlast_coding_system_used.
9059
9060    This function is called only from macros DECODE_FILE and
9061    ENCODE_FILE, thus we ignore character composition.  */
9062
9063 Lisp_Object
9064 code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9065                               int encodep)
9066 {
9067   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9068 }
9069
9070
9071 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9072        2, 4, 0,
9073        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9074
9075 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9076 if the decoding operation is trivial.
9077
9078 Optional fourth arg BUFFER non-nil means that the decoded text is
9079 inserted in that buffer after point (point does not move).  In this
9080 case, the return value is the length of the decoded text.
9081
9082 This function sets `last-coding-system-used' to the precise coding system
9083 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9084 not fully specified.)  */)
9085   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9086 {
9087   return code_convert_string (string, coding_system, buffer,
9088                               0, ! NILP (nocopy), 0);
9089 }
9090
9091 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9092        2, 4, 0,
9093        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9094
9095 Optional third arg NOCOPY non-nil means it is OK to return STRING
9096 itself if the encoding operation is trivial.
9097
9098 Optional fourth arg BUFFER non-nil means that the encoded text is
9099 inserted in that buffer after point (point does not move).  In this
9100 case, the return value is the length of the encoded text.
9101
9102 This function sets `last-coding-system-used' to the precise coding system
9103 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9104 not fully specified.)  */)
9105   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9106 {
9107   return code_convert_string (string, coding_system, buffer,
9108                               1, ! NILP (nocopy), 1);
9109 }
9110
9111 \f
9112 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9113        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9114 Return the corresponding character.  */)
9115   (Lisp_Object code)
9116 {
9117   Lisp_Object spec, attrs, val;
9118   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9119   int c;
9120
9121   CHECK_NATNUM (code);
9122   c = XFASTINT (code);
9123   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9124   attrs = AREF (spec, 0);
9125
9126   if (ASCII_BYTE_P (c)
9127       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9128     return code;
9129
9130   val = CODING_ATTR_CHARSET_LIST (attrs);
9131   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9132   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9133   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9134
9135   if (c <= 0x7F)
9136     charset = charset_roman;
9137   else if (c >= 0xA0 && c < 0xDF)
9138     {
9139       charset = charset_kana;
9140       c -= 0x80;
9141     }
9142   else
9143     {
9144       int s1 = c >> 8, s2 = c & 0xFF;
9145
9146       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9147           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9148         error ("Invalid code: %d", code);
9149       SJIS_TO_JIS (c);
9150       charset = charset_kanji;
9151     }
9152   c = DECODE_CHAR (charset, c);
9153   if (c < 0)
9154     error ("Invalid code: %d", code);
9155   return make_number (c);
9156 }
9157
9158
9159 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9160        doc: /* Encode a Japanese character CH to shift_jis encoding.
9161 Return the corresponding code in SJIS.  */)
9162   (Lisp_Object ch)
9163 {
9164   Lisp_Object spec, attrs, charset_list;
9165   int c;
9166   struct charset *charset;
9167   unsigned code;
9168
9169   CHECK_CHARACTER (ch);
9170   c = XFASTINT (ch);
9171   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9172   attrs = AREF (spec, 0);
9173
9174   if (ASCII_CHAR_P (c)
9175       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9176     return ch;
9177
9178   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9179   charset = char_charset (c, charset_list, &code);
9180   if (code == CHARSET_INVALID_CODE (charset))
9181     error ("Can't encode by shift_jis encoding: %d", c);
9182   JIS_TO_SJIS (code);
9183
9184   return make_number (code);
9185 }
9186
9187 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9188        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9189 Return the corresponding character.  */)
9190   (Lisp_Object code)
9191 {
9192   Lisp_Object spec, attrs, val;
9193   struct charset *charset_roman, *charset_big5, *charset;
9194   int c;
9195
9196   CHECK_NATNUM (code);
9197   c = XFASTINT (code);
9198   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9199   attrs = AREF (spec, 0);
9200
9201   if (ASCII_BYTE_P (c)
9202       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9203     return code;
9204
9205   val = CODING_ATTR_CHARSET_LIST (attrs);
9206   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9207   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9208
9209   if (c <= 0x7F)
9210     charset = charset_roman;
9211   else
9212     {
9213       int b1 = c >> 8, b2 = c & 0x7F;
9214       if (b1 < 0xA1 || b1 > 0xFE
9215           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9216         error ("Invalid code: %d", code);
9217       charset = charset_big5;
9218     }
9219   c = DECODE_CHAR (charset, (unsigned )c);
9220   if (c < 0)
9221     error ("Invalid code: %d", code);
9222   return make_number (c);
9223 }
9224
9225 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9226        doc: /* Encode the Big5 character CH to BIG5 coding system.
9227 Return the corresponding character code in Big5.  */)
9228   (Lisp_Object ch)
9229 {
9230   Lisp_Object spec, attrs, charset_list;
9231   struct charset *charset;
9232   int c;
9233   unsigned code;
9234
9235   CHECK_CHARACTER (ch);
9236   c = XFASTINT (ch);
9237   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9238   attrs = AREF (spec, 0);
9239   if (ASCII_CHAR_P (c)
9240       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9241     return ch;
9242
9243   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9244   charset = char_charset (c, charset_list, &code);
9245   if (code == CHARSET_INVALID_CODE (charset))
9246     error ("Can't encode by Big5 encoding: %d", c);
9247
9248   return make_number (code);
9249 }
9250
9251 \f
9252 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9253        Sset_terminal_coding_system_internal, 1, 2, 0,
9254        doc: /* Internal use only.  */)
9255   (Lisp_Object coding_system, Lisp_Object terminal)
9256 {
9257   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9258   CHECK_SYMBOL (coding_system);
9259   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9260   /* We had better not send unsafe characters to terminal.  */
9261   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9262   /* Characer composition should be disabled.  */
9263   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9264   terminal_coding->src_multibyte = 1;
9265   terminal_coding->dst_multibyte = 0;
9266   return Qnil;
9267 }
9268
9269 DEFUN ("set-safe-terminal-coding-system-internal",
9270        Fset_safe_terminal_coding_system_internal,
9271        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9272        doc: /* Internal use only.  */)
9273   (Lisp_Object coding_system)
9274 {
9275   CHECK_SYMBOL (coding_system);
9276   setup_coding_system (Fcheck_coding_system (coding_system),
9277                        &safe_terminal_coding);
9278   /* Characer composition should be disabled.  */
9279   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9280   safe_terminal_coding.src_multibyte = 1;
9281   safe_terminal_coding.dst_multibyte = 0;
9282   return Qnil;
9283 }
9284
9285 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9286        Sterminal_coding_system, 0, 1, 0,
9287        doc: /* Return coding system specified for terminal output on the given terminal.
9288 TERMINAL may be a terminal object, a frame, or nil for the selected
9289 frame's terminal device.  */)
9290   (Lisp_Object terminal)
9291 {
9292   struct coding_system *terminal_coding
9293     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9294   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9295
9296   /* For backward compatibility, return nil if it is `undecided'. */
9297   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9298 }
9299
9300 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9301        Sset_keyboard_coding_system_internal, 1, 2, 0,
9302        doc: /* Internal use only.  */)
9303   (Lisp_Object coding_system, Lisp_Object terminal)
9304 {
9305   struct terminal *t = get_terminal (terminal, 1);
9306   CHECK_SYMBOL (coding_system);
9307   if (NILP (coding_system))
9308     coding_system = Qno_conversion;
9309   else
9310     Fcheck_coding_system (coding_system);
9311   setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9312   /* Characer composition should be disabled.  */
9313   TERMINAL_KEYBOARD_CODING (t)->common_flags
9314     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9315   return Qnil;
9316 }
9317
9318 DEFUN ("keyboard-coding-system",
9319        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9320        doc: /* Return coding system specified for decoding keyboard input.  */)
9321   (Lisp_Object terminal)
9322 {
9323   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9324                          (get_terminal (terminal, 1))->id);
9325 }
9326
9327 \f
9328 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9329        Sfind_operation_coding_system,  1, MANY, 0,
9330        doc: /* Choose a coding system for an operation based on the target name.
9331 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9332 DECODING-SYSTEM is the coding system to use for decoding
9333 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9334 for encoding (in case OPERATION does encoding).
9335
9336 The first argument OPERATION specifies an I/O primitive:
9337   For file I/O, `insert-file-contents' or `write-region'.
9338   For process I/O, `call-process', `call-process-region', or `start-process'.
9339   For network I/O, `open-network-stream'.
9340
9341 The remaining arguments should be the same arguments that were passed
9342 to the primitive.  Depending on which primitive, one of those arguments
9343 is selected as the TARGET.  For example, if OPERATION does file I/O,
9344 whichever argument specifies the file name is TARGET.
9345
9346 TARGET has a meaning which depends on OPERATION:
9347   For file I/O, TARGET is a file name (except for the special case below).
9348   For process I/O, TARGET is a process name.
9349   For network I/O, TARGET is a service name or a port number.
9350
9351 This function looks up what is specified for TARGET in
9352 `file-coding-system-alist', `process-coding-system-alist',
9353 or `network-coding-system-alist' depending on OPERATION.
9354 They may specify a coding system, a cons of coding systems,
9355 or a function symbol to call.
9356 In the last case, we call the function with one argument,
9357 which is a list of all the arguments given to this function.
9358 If the function can't decide a coding system, it can return
9359 `undecided' so that the normal code-detection is performed.
9360
9361 If OPERATION is `insert-file-contents', the argument corresponding to
9362 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9363 file name to look up, and BUFFER is a buffer that contains the file's
9364 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9365 function to call for FILENAME, that function should examine the
9366 contents of BUFFER instead of reading the file.
9367
9368 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9369   (int nargs, Lisp_Object *args)
9370 {
9371   Lisp_Object operation, target_idx, target, val;
9372   register Lisp_Object chain;
9373
9374   if (nargs < 2)
9375     error ("Too few arguments");
9376   operation = args[0];
9377   if (!SYMBOLP (operation)
9378       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
9379     error ("Invalid first argument");
9380   if (nargs < 1 + XINT (target_idx))
9381     error ("Too few arguments for operation: %s",
9382            SDATA (SYMBOL_NAME (operation)));
9383   target = args[XINT (target_idx) + 1];
9384   if (!(STRINGP (target)
9385         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9386             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9387         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9388     error ("Invalid %dth argument", XINT (target_idx) + 1);
9389   if (CONSP (target))
9390     target = XCAR (target);
9391
9392   chain = ((EQ (operation, Qinsert_file_contents)
9393             || EQ (operation, Qwrite_region))
9394            ? Vfile_coding_system_alist
9395            : (EQ (operation, Qopen_network_stream)
9396               ? Vnetwork_coding_system_alist
9397               : Vprocess_coding_system_alist));
9398   if (NILP (chain))
9399     return Qnil;
9400
9401   for (; CONSP (chain); chain = XCDR (chain))
9402     {
9403       Lisp_Object elt;
9404
9405       elt = XCAR (chain);
9406       if (CONSP (elt)
9407           && ((STRINGP (target)
9408                && STRINGP (XCAR (elt))
9409                && fast_string_match (XCAR (elt), target) >= 0)
9410               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9411         {
9412           val = XCDR (elt);
9413           /* Here, if VAL is both a valid coding system and a valid
9414              function symbol, we return VAL as a coding system.  */
9415           if (CONSP (val))
9416             return val;
9417           if (! SYMBOLP (val))
9418             return Qnil;
9419           if (! NILP (Fcoding_system_p (val)))
9420             return Fcons (val, val);
9421           if (! NILP (Ffboundp (val)))
9422             {
9423               /* We use call1 rather than safe_call1
9424                  so as to get bug reports about functions called here
9425                  which don't handle the current interface.  */
9426               val = call1 (val, Flist (nargs, args));
9427               if (CONSP (val))
9428                 return val;
9429               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9430                 return Fcons (val, val);
9431             }
9432           return Qnil;
9433         }
9434     }
9435   return Qnil;
9436 }
9437
9438 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9439        Sset_coding_system_priority, 0, MANY, 0,
9440        doc: /* Assign higher priority to the coding systems given as arguments.
9441 If multiple coding systems belong to the same category,
9442 all but the first one are ignored.
9443
9444 usage: (set-coding-system-priority &rest coding-systems)  */)
9445   (int nargs, Lisp_Object *args)
9446 {
9447   int i, j;
9448   int changed[coding_category_max];
9449   enum coding_category priorities[coding_category_max];
9450
9451   memset (changed, 0, sizeof changed);
9452
9453   for (i = j = 0; i < nargs; i++)
9454     {
9455       enum coding_category category;
9456       Lisp_Object spec, attrs;
9457
9458       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9459       attrs = AREF (spec, 0);
9460       category = XINT (CODING_ATTR_CATEGORY (attrs));
9461       if (changed[category])
9462         /* Ignore this coding system because a coding system of the
9463            same category already had a higher priority.  */
9464         continue;
9465       changed[category] = 1;
9466       priorities[j++] = category;
9467       if (coding_categories[category].id >= 0
9468           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9469         setup_coding_system (args[i], &coding_categories[category]);
9470       Fset (AREF (Vcoding_category_table, category), args[i]);
9471     }
9472
9473   /* Now we have decided top J priorities.  Reflect the order of the
9474      original priorities to the remaining priorities.  */
9475
9476   for (i = j, j = 0; i < coding_category_max; i++, j++)
9477     {
9478       while (j < coding_category_max
9479              && changed[coding_priorities[j]])
9480         j++;
9481       if (j == coding_category_max)
9482         abort ();
9483       priorities[i] = coding_priorities[j];
9484     }
9485
9486   memcpy (coding_priorities, priorities, sizeof priorities);
9487
9488   /* Update `coding-category-list'.  */
9489   Vcoding_category_list = Qnil;
9490   for (i = coding_category_max - 1; i >= 0; i--)
9491     Vcoding_category_list
9492       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9493                Vcoding_category_list);
9494
9495   return Qnil;
9496 }
9497
9498 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9499        Scoding_system_priority_list, 0, 1, 0,
9500        doc: /* Return a list of coding systems ordered by their priorities.
9501 The list contains a subset of coding systems; i.e. coding systems
9502 assigned to each coding category (see `coding-category-list').
9503
9504 HIGHESTP non-nil means just return the highest priority one.  */)
9505   (Lisp_Object highestp)
9506 {
9507   int i;
9508   Lisp_Object val;
9509
9510   for (i = 0, val = Qnil; i < coding_category_max; i++)
9511     {
9512       enum coding_category category = coding_priorities[i];
9513       int id = coding_categories[category].id;
9514       Lisp_Object attrs;
9515
9516       if (id < 0)
9517         continue;
9518       attrs = CODING_ID_ATTRS (id);
9519       if (! NILP (highestp))
9520         return CODING_ATTR_BASE_NAME (attrs);
9521       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9522     }
9523   return Fnreverse (val);
9524 }
9525
9526 static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
9527
9528 static Lisp_Object
9529 make_subsidiaries (Lisp_Object base)
9530 {
9531   Lisp_Object subsidiaries;
9532   int base_name_len = SBYTES (SYMBOL_NAME (base));
9533   char *buf = (char *) alloca (base_name_len + 6);
9534   int i;
9535
9536   memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
9537   subsidiaries = Fmake_vector (make_number (3), Qnil);
9538   for (i = 0; i < 3; i++)
9539     {
9540       memcpy (buf + base_name_len, suffixes[i], strlen (suffixes[i]) + 1);
9541       ASET (subsidiaries, i, intern (buf));
9542     }
9543   return subsidiaries;
9544 }
9545
9546
9547 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9548        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9549        doc: /* For internal use only.
9550 usage: (define-coding-system-internal ...)  */)
9551   (int nargs, Lisp_Object *args)
9552 {
9553   Lisp_Object name;
9554   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9555   Lisp_Object attrs;            /* Vector of attributes.  */
9556   Lisp_Object eol_type;
9557   Lisp_Object aliases;
9558   Lisp_Object coding_type, charset_list, safe_charsets;
9559   enum coding_category category;
9560   Lisp_Object tail, val;
9561   int max_charset_id = 0;
9562   int i;
9563
9564   if (nargs < coding_arg_max)
9565     goto short_args;
9566
9567   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9568
9569   name = args[coding_arg_name];
9570   CHECK_SYMBOL (name);
9571   CODING_ATTR_BASE_NAME (attrs) = name;
9572
9573   val = args[coding_arg_mnemonic];
9574   if (! STRINGP (val))
9575     CHECK_CHARACTER (val);
9576   CODING_ATTR_MNEMONIC (attrs) = val;
9577
9578   coding_type = args[coding_arg_coding_type];
9579   CHECK_SYMBOL (coding_type);
9580   CODING_ATTR_TYPE (attrs) = coding_type;
9581
9582   charset_list = args[coding_arg_charset_list];
9583   if (SYMBOLP (charset_list))
9584     {
9585       if (EQ (charset_list, Qiso_2022))
9586         {
9587           if (! EQ (coding_type, Qiso_2022))
9588             error ("Invalid charset-list");
9589           charset_list = Viso_2022_charset_list;
9590         }
9591       else if (EQ (charset_list, Qemacs_mule))
9592         {
9593           if (! EQ (coding_type, Qemacs_mule))
9594             error ("Invalid charset-list");
9595           charset_list = Vemacs_mule_charset_list;
9596         }
9597       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9598         if (max_charset_id < XFASTINT (XCAR (tail)))
9599           max_charset_id = XFASTINT (XCAR (tail));
9600     }
9601   else
9602     {
9603       charset_list = Fcopy_sequence (charset_list);
9604       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9605         {
9606           struct charset *charset;
9607
9608           val = XCAR (tail);
9609           CHECK_CHARSET_GET_CHARSET (val, charset);
9610           if (EQ (coding_type, Qiso_2022)
9611               ? CHARSET_ISO_FINAL (charset) < 0
9612               : EQ (coding_type, Qemacs_mule)
9613               ? CHARSET_EMACS_MULE_ID (charset) < 0
9614               : 0)
9615             error ("Can't handle charset `%s'",
9616                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9617
9618           XSETCAR (tail, make_number (charset->id));
9619           if (max_charset_id < charset->id)
9620             max_charset_id = charset->id;
9621         }
9622     }
9623   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9624
9625   safe_charsets = make_uninit_string (max_charset_id + 1);
9626   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9627   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9628     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9629   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9630
9631   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9632
9633   val = args[coding_arg_decode_translation_table];
9634   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9635     CHECK_SYMBOL (val);
9636   CODING_ATTR_DECODE_TBL (attrs) = val;
9637
9638   val = args[coding_arg_encode_translation_table];
9639   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9640     CHECK_SYMBOL (val);
9641   CODING_ATTR_ENCODE_TBL (attrs) = val;
9642
9643   val = args[coding_arg_post_read_conversion];
9644   CHECK_SYMBOL (val);
9645   CODING_ATTR_POST_READ (attrs) = val;
9646
9647   val = args[coding_arg_pre_write_conversion];
9648   CHECK_SYMBOL (val);
9649   CODING_ATTR_PRE_WRITE (attrs) = val;
9650
9651   val = args[coding_arg_default_char];
9652   if (NILP (val))
9653     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9654   else
9655     {
9656       CHECK_CHARACTER (val);
9657       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9658     }
9659
9660   val = args[coding_arg_for_unibyte];
9661   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9662
9663   val = args[coding_arg_plist];
9664   CHECK_LIST (val);
9665   CODING_ATTR_PLIST (attrs) = val;
9666
9667   if (EQ (coding_type, Qcharset))
9668     {
9669       /* Generate a lisp vector of 256 elements.  Each element is nil,
9670          integer, or a list of charset IDs.
9671
9672          If Nth element is nil, the byte code N is invalid in this
9673          coding system.
9674
9675          If Nth element is a number NUM, N is the first byte of a
9676          charset whose ID is NUM.
9677
9678          If Nth element is a list of charset IDs, N is the first byte
9679          of one of them.  The list is sorted by dimensions of the
9680          charsets.  A charset of smaller dimension comes firtst. */
9681       val = Fmake_vector (make_number (256), Qnil);
9682
9683       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9684         {
9685           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9686           int dim = CHARSET_DIMENSION (charset);
9687           int idx = (dim - 1) * 4;
9688
9689           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9690             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9691
9692           for (i = charset->code_space[idx];
9693                i <= charset->code_space[idx + 1]; i++)
9694             {
9695               Lisp_Object tmp, tmp2;
9696               int dim2;
9697
9698               tmp = AREF (val, i);
9699               if (NILP (tmp))
9700                 tmp = XCAR (tail);
9701               else if (NUMBERP (tmp))
9702                 {
9703                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9704                   if (dim < dim2)
9705                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9706                   else
9707                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9708                 }
9709               else
9710                 {
9711                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9712                     {
9713                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9714                       if (dim < dim2)
9715                         break;
9716                     }
9717                   if (NILP (tmp2))
9718                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9719                   else
9720                     {
9721                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9722                       XSETCAR (tmp2, XCAR (tail));
9723                     }
9724                 }
9725               ASET (val, i, tmp);
9726             }
9727         }
9728       ASET (attrs, coding_attr_charset_valids, val);
9729       category = coding_category_charset;
9730     }
9731   else if (EQ (coding_type, Qccl))
9732     {
9733       Lisp_Object valids;
9734
9735       if (nargs < coding_arg_ccl_max)
9736         goto short_args;
9737
9738       val = args[coding_arg_ccl_decoder];
9739       CHECK_CCL_PROGRAM (val);
9740       if (VECTORP (val))
9741         val = Fcopy_sequence (val);
9742       ASET (attrs, coding_attr_ccl_decoder, val);
9743
9744       val = args[coding_arg_ccl_encoder];
9745       CHECK_CCL_PROGRAM (val);
9746       if (VECTORP (val))
9747         val = Fcopy_sequence (val);
9748       ASET (attrs, coding_attr_ccl_encoder, val);
9749
9750       val = args[coding_arg_ccl_valids];
9751       valids = Fmake_string (make_number (256), make_number (0));
9752       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9753         {
9754           int from, to;
9755
9756           val = Fcar (tail);
9757           if (INTEGERP (val))
9758             {
9759               from = to = XINT (val);
9760               if (from < 0 || from > 255)
9761                 args_out_of_range_3 (val, make_number (0), make_number (255));
9762             }
9763           else
9764             {
9765               CHECK_CONS (val);
9766               CHECK_NATNUM_CAR (val);
9767               CHECK_NATNUM_CDR (val);
9768               from = XINT (XCAR (val));
9769               if (from > 255)
9770                 args_out_of_range_3 (XCAR (val),
9771                                      make_number (0), make_number (255));
9772               to = XINT (XCDR (val));
9773               if (to < from || to > 255)
9774                 args_out_of_range_3 (XCDR (val),
9775                                      XCAR (val), make_number (255));
9776             }
9777           for (i = from; i <= to; i++)
9778             SSET (valids, i, 1);
9779         }
9780       ASET (attrs, coding_attr_ccl_valids, valids);
9781
9782       category = coding_category_ccl;
9783     }
9784   else if (EQ (coding_type, Qutf_16))
9785     {
9786       Lisp_Object bom, endian;
9787
9788       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9789
9790       if (nargs < coding_arg_utf16_max)
9791         goto short_args;
9792
9793       bom = args[coding_arg_utf16_bom];
9794       if (! NILP (bom) && ! EQ (bom, Qt))
9795         {
9796           CHECK_CONS (bom);
9797           val = XCAR (bom);
9798           CHECK_CODING_SYSTEM (val);
9799           val = XCDR (bom);
9800           CHECK_CODING_SYSTEM (val);
9801         }
9802       ASET (attrs, coding_attr_utf_bom, bom);
9803
9804       endian = args[coding_arg_utf16_endian];
9805       CHECK_SYMBOL (endian);
9806       if (NILP (endian))
9807         endian = Qbig;
9808       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9809         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9810       ASET (attrs, coding_attr_utf_16_endian, endian);
9811
9812       category = (CONSP (bom)
9813                   ? coding_category_utf_16_auto
9814                   : NILP (bom)
9815                   ? (EQ (endian, Qbig)
9816                      ? coding_category_utf_16_be_nosig
9817                      : coding_category_utf_16_le_nosig)
9818                   : (EQ (endian, Qbig)
9819                      ? coding_category_utf_16_be
9820                      : coding_category_utf_16_le));
9821     }
9822   else if (EQ (coding_type, Qiso_2022))
9823     {
9824       Lisp_Object initial, reg_usage, request, flags;
9825       int i;
9826
9827       if (nargs < coding_arg_iso2022_max)
9828         goto short_args;
9829
9830       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9831       CHECK_VECTOR (initial);
9832       for (i = 0; i < 4; i++)
9833         {
9834           val = Faref (initial, make_number (i));
9835           if (! NILP (val))
9836             {
9837               struct charset *charset;
9838
9839               CHECK_CHARSET_GET_CHARSET (val, charset);
9840               ASET (initial, i, make_number (CHARSET_ID (charset)));
9841               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9842                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9843             }
9844           else
9845             ASET (initial, i, make_number (-1));
9846         }
9847
9848       reg_usage = args[coding_arg_iso2022_reg_usage];
9849       CHECK_CONS (reg_usage);
9850       CHECK_NUMBER_CAR (reg_usage);
9851       CHECK_NUMBER_CDR (reg_usage);
9852
9853       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9854       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9855         {
9856           int id;
9857           Lisp_Object tmp;
9858
9859           val = Fcar (tail);
9860           CHECK_CONS (val);
9861           tmp = XCAR (val);
9862           CHECK_CHARSET_GET_ID (tmp, id);
9863           CHECK_NATNUM_CDR (val);
9864           if (XINT (XCDR (val)) >= 4)
9865             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9866           XSETCAR (val, make_number (id));
9867         }
9868
9869       flags = args[coding_arg_iso2022_flags];
9870       CHECK_NATNUM (flags);
9871       i = XINT (flags);
9872       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9873         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9874
9875       ASET (attrs, coding_attr_iso_initial, initial);
9876       ASET (attrs, coding_attr_iso_usage, reg_usage);
9877       ASET (attrs, coding_attr_iso_request, request);
9878       ASET (attrs, coding_attr_iso_flags, flags);
9879       setup_iso_safe_charsets (attrs);
9880
9881       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9882         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9883                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9884                     ? coding_category_iso_7_else
9885                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9886                     ? coding_category_iso_7
9887                     : coding_category_iso_7_tight);
9888       else
9889         {
9890           int id = XINT (AREF (initial, 1));
9891
9892           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9893                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9894                        || id < 0)
9895                       ? coding_category_iso_8_else
9896                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9897                       ? coding_category_iso_8_1
9898                       : coding_category_iso_8_2);
9899         }
9900       if (category != coding_category_iso_8_1
9901           && category != coding_category_iso_8_2)
9902         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9903     }
9904   else if (EQ (coding_type, Qemacs_mule))
9905     {
9906       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9907         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9908       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9909       category = coding_category_emacs_mule;
9910     }
9911   else if (EQ (coding_type, Qshift_jis))
9912     {
9913
9914       struct charset *charset;
9915
9916       if (XINT (Flength (charset_list)) != 3
9917           && XINT (Flength (charset_list)) != 4)
9918         error ("There should be three or four charsets");
9919
9920       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9921       if (CHARSET_DIMENSION (charset) != 1)
9922         error ("Dimension of charset %s is not one",
9923                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9924       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9925         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9926
9927       charset_list = XCDR (charset_list);
9928       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9929       if (CHARSET_DIMENSION (charset) != 1)
9930         error ("Dimension of charset %s is not one",
9931                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9932
9933       charset_list = XCDR (charset_list);
9934       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9935       if (CHARSET_DIMENSION (charset) != 2)
9936         error ("Dimension of charset %s is not two",
9937                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9938
9939       charset_list = XCDR (charset_list);
9940       if (! NILP (charset_list))
9941         {
9942           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9943           if (CHARSET_DIMENSION (charset) != 2)
9944             error ("Dimension of charset %s is not two",
9945                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9946         }
9947
9948       category = coding_category_sjis;
9949       Vsjis_coding_system = name;
9950     }
9951   else if (EQ (coding_type, Qbig5))
9952     {
9953       struct charset *charset;
9954
9955       if (XINT (Flength (charset_list)) != 2)
9956         error ("There should be just two charsets");
9957
9958       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9959       if (CHARSET_DIMENSION (charset) != 1)
9960         error ("Dimension of charset %s is not one",
9961                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9962       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9963         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9964
9965       charset_list = XCDR (charset_list);
9966       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9967       if (CHARSET_DIMENSION (charset) != 2)
9968         error ("Dimension of charset %s is not two",
9969                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9970
9971       category = coding_category_big5;
9972       Vbig5_coding_system = name;
9973     }
9974   else if (EQ (coding_type, Qraw_text))
9975     {
9976       category = coding_category_raw_text;
9977       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9978     }
9979   else if (EQ (coding_type, Qutf_8))
9980     {
9981       Lisp_Object bom;
9982
9983       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9984
9985       if (nargs < coding_arg_utf8_max)
9986         goto short_args;
9987
9988       bom = args[coding_arg_utf8_bom];
9989       if (! NILP (bom) && ! EQ (bom, Qt))
9990         {
9991           CHECK_CONS (bom);
9992           val = XCAR (bom);
9993           CHECK_CODING_SYSTEM (val);
9994           val = XCDR (bom);
9995           CHECK_CODING_SYSTEM (val);
9996         }
9997       ASET (attrs, coding_attr_utf_bom, bom);
9998
9999       category = (CONSP (bom) ? coding_category_utf_8_auto
10000                   : NILP (bom) ? coding_category_utf_8_nosig
10001                   : coding_category_utf_8_sig);
10002     }
10003   else if (EQ (coding_type, Qundecided))
10004     category = coding_category_undecided;
10005   else
10006     error ("Invalid coding system type: %s",
10007            SDATA (SYMBOL_NAME (coding_type)));
10008
10009   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10010   CODING_ATTR_PLIST (attrs)
10011     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10012                                 CODING_ATTR_PLIST (attrs)));
10013   CODING_ATTR_PLIST (attrs)
10014     = Fcons (QCascii_compatible_p,
10015              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10016                     CODING_ATTR_PLIST (attrs)));
10017
10018   eol_type = args[coding_arg_eol_type];
10019   if (! NILP (eol_type)
10020       && ! EQ (eol_type, Qunix)
10021       && ! EQ (eol_type, Qdos)
10022       && ! EQ (eol_type, Qmac))
10023     error ("Invalid eol-type");
10024
10025   aliases = Fcons (name, Qnil);
10026
10027   if (NILP (eol_type))
10028     {
10029       eol_type = make_subsidiaries (name);
10030       for (i = 0; i < 3; i++)
10031         {
10032           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10033
10034           this_name = AREF (eol_type, i);
10035           this_aliases = Fcons (this_name, Qnil);
10036           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10037           this_spec = Fmake_vector (make_number (3), attrs);
10038           ASET (this_spec, 1, this_aliases);
10039           ASET (this_spec, 2, this_eol_type);
10040           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10041           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10042           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10043           if (NILP (val))
10044             Vcoding_system_alist
10045               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10046                        Vcoding_system_alist);
10047         }
10048     }
10049
10050   spec_vec = Fmake_vector (make_number (3), attrs);
10051   ASET (spec_vec, 1, aliases);
10052   ASET (spec_vec, 2, eol_type);
10053
10054   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10055   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10056   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10057   if (NILP (val))
10058     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10059                                   Vcoding_system_alist);
10060
10061   {
10062     int id = coding_categories[category].id;
10063
10064     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10065       setup_coding_system (name, &coding_categories[category]);
10066   }
10067
10068   return Qnil;
10069
10070  short_args:
10071   return Fsignal (Qwrong_number_of_arguments,
10072                   Fcons (intern ("define-coding-system-internal"),
10073                          make_number (nargs)));
10074 }
10075
10076
10077 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10078        3, 3, 0,
10079        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10080   (Lisp_Object coding_system, Lisp_Object prop, Lisp_Object val)
10081 {
10082   Lisp_Object spec, attrs;
10083
10084   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10085   attrs = AREF (spec, 0);
10086   if (EQ (prop, QCmnemonic))
10087     {
10088       if (! STRINGP (val))
10089         CHECK_CHARACTER (val);
10090       CODING_ATTR_MNEMONIC (attrs) = val;
10091     }
10092   else if (EQ (prop, QCdefault_char))
10093     {
10094       if (NILP (val))
10095         val = make_number (' ');
10096       else
10097         CHECK_CHARACTER (val);
10098       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10099     }
10100   else if (EQ (prop, QCdecode_translation_table))
10101     {
10102       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10103         CHECK_SYMBOL (val);
10104       CODING_ATTR_DECODE_TBL (attrs) = val;
10105     }
10106   else if (EQ (prop, QCencode_translation_table))
10107     {
10108       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10109         CHECK_SYMBOL (val);
10110       CODING_ATTR_ENCODE_TBL (attrs) = val;
10111     }
10112   else if (EQ (prop, QCpost_read_conversion))
10113     {
10114       CHECK_SYMBOL (val);
10115       CODING_ATTR_POST_READ (attrs) = val;
10116     }
10117   else if (EQ (prop, QCpre_write_conversion))
10118     {
10119       CHECK_SYMBOL (val);
10120       CODING_ATTR_PRE_WRITE (attrs) = val;
10121     }
10122   else if (EQ (prop, QCascii_compatible_p))
10123     {
10124       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10125     }
10126
10127   CODING_ATTR_PLIST (attrs)
10128     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10129   return val;
10130 }
10131
10132
10133 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10134        Sdefine_coding_system_alias, 2, 2, 0,
10135        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10136   (Lisp_Object alias, Lisp_Object coding_system)
10137 {
10138   Lisp_Object spec, aliases, eol_type, val;
10139
10140   CHECK_SYMBOL (alias);
10141   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10142   aliases = AREF (spec, 1);
10143   /* ALIASES should be a list of length more than zero, and the first
10144      element is a base coding system.  Append ALIAS at the tail of the
10145      list.  */
10146   while (!NILP (XCDR (aliases)))
10147     aliases = XCDR (aliases);
10148   XSETCDR (aliases, Fcons (alias, Qnil));
10149
10150   eol_type = AREF (spec, 2);
10151   if (VECTORP (eol_type))
10152     {
10153       Lisp_Object subsidiaries;
10154       int i;
10155
10156       subsidiaries = make_subsidiaries (alias);
10157       for (i = 0; i < 3; i++)
10158         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10159                                      AREF (eol_type, i));
10160     }
10161
10162   Fputhash (alias, spec, Vcoding_system_hash_table);
10163   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10164   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10165   if (NILP (val))
10166     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10167                                   Vcoding_system_alist);
10168
10169   return Qnil;
10170 }
10171
10172 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10173        1, 1, 0,
10174        doc: /* Return the base of CODING-SYSTEM.
10175 Any alias or subsidiary coding system is not a base coding system.  */)
10176   (Lisp_Object coding_system)
10177 {
10178   Lisp_Object spec, attrs;
10179
10180   if (NILP (coding_system))
10181     return (Qno_conversion);
10182   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10183   attrs = AREF (spec, 0);
10184   return CODING_ATTR_BASE_NAME (attrs);
10185 }
10186
10187 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10188        1, 1, 0,
10189        doc: "Return the property list of CODING-SYSTEM.")
10190   (Lisp_Object coding_system)
10191 {
10192   Lisp_Object spec, attrs;
10193
10194   if (NILP (coding_system))
10195     coding_system = Qno_conversion;
10196   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10197   attrs = AREF (spec, 0);
10198   return CODING_ATTR_PLIST (attrs);
10199 }
10200
10201
10202 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10203        1, 1, 0,
10204        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10205   (Lisp_Object coding_system)
10206 {
10207   Lisp_Object spec;
10208
10209   if (NILP (coding_system))
10210     coding_system = Qno_conversion;
10211   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10212   return AREF (spec, 1);
10213 }
10214
10215 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10216        Scoding_system_eol_type, 1, 1, 0,
10217        doc: /* Return eol-type of CODING-SYSTEM.
10218 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10219
10220 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10221 and CR respectively.
10222
10223 A vector value indicates that a format of end-of-line should be
10224 detected automatically.  Nth element of the vector is the subsidiary
10225 coding system whose eol-type is N.  */)
10226   (Lisp_Object coding_system)
10227 {
10228   Lisp_Object spec, eol_type;
10229   int n;
10230
10231   if (NILP (coding_system))
10232     coding_system = Qno_conversion;
10233   if (! CODING_SYSTEM_P (coding_system))
10234     return Qnil;
10235   spec = CODING_SYSTEM_SPEC (coding_system);
10236   eol_type = AREF (spec, 2);
10237   if (VECTORP (eol_type))
10238     return Fcopy_sequence (eol_type);
10239   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10240   return make_number (n);
10241 }
10242
10243 #endif /* emacs */
10244
10245 \f
10246 /*** 9. Post-amble ***/
10247
10248 void
10249 init_coding_once (void)
10250 {
10251   int i;
10252
10253   for (i = 0; i < coding_category_max; i++)
10254     {
10255       coding_categories[i].id = -1;
10256       coding_priorities[i] = i;
10257     }
10258
10259   /* ISO2022 specific initialize routine.  */
10260   for (i = 0; i < 0x20; i++)
10261     iso_code_class[i] = ISO_control_0;
10262   for (i = 0x21; i < 0x7F; i++)
10263     iso_code_class[i] = ISO_graphic_plane_0;
10264   for (i = 0x80; i < 0xA0; i++)
10265     iso_code_class[i] = ISO_control_1;
10266   for (i = 0xA1; i < 0xFF; i++)
10267     iso_code_class[i] = ISO_graphic_plane_1;
10268   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10269   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10270   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10271   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10272   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10273   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10274   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10275   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10276   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10277
10278   for (i = 0; i < 256; i++)
10279     {
10280       emacs_mule_bytes[i] = 1;
10281     }
10282   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10283   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10284   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10285   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10286 }
10287
10288 #ifdef emacs
10289
10290 void
10291 syms_of_coding (void)
10292 {
10293   staticpro (&Vcoding_system_hash_table);
10294   {
10295     Lisp_Object args[2];
10296     args[0] = QCtest;
10297     args[1] = Qeq;
10298     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10299   }
10300
10301   staticpro (&Vsjis_coding_system);
10302   Vsjis_coding_system = Qnil;
10303
10304   staticpro (&Vbig5_coding_system);
10305   Vbig5_coding_system = Qnil;
10306
10307   staticpro (&Vcode_conversion_reused_workbuf);
10308   Vcode_conversion_reused_workbuf = Qnil;
10309
10310   staticpro (&Vcode_conversion_workbuf_name);
10311   Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
10312
10313   reused_workbuf_in_use = 0;
10314
10315   DEFSYM (Qcharset, "charset");
10316   DEFSYM (Qtarget_idx, "target-idx");
10317   DEFSYM (Qcoding_system_history, "coding-system-history");
10318   Fset (Qcoding_system_history, Qnil);
10319
10320   /* Target FILENAME is the first argument.  */
10321   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10322   /* Target FILENAME is the third argument.  */
10323   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10324
10325   DEFSYM (Qcall_process, "call-process");
10326   /* Target PROGRAM is the first argument.  */
10327   Fput (Qcall_process, Qtarget_idx, make_number (0));
10328
10329   DEFSYM (Qcall_process_region, "call-process-region");
10330   /* Target PROGRAM is the third argument.  */
10331   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10332
10333   DEFSYM (Qstart_process, "start-process");
10334   /* Target PROGRAM is the third argument.  */
10335   Fput (Qstart_process, Qtarget_idx, make_number (2));
10336
10337   DEFSYM (Qopen_network_stream, "open-network-stream");
10338   /* Target SERVICE is the fourth argument.  */
10339   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10340
10341   DEFSYM (Qcoding_system, "coding-system");
10342   DEFSYM (Qcoding_aliases, "coding-aliases");
10343
10344   DEFSYM (Qeol_type, "eol-type");
10345   DEFSYM (Qunix, "unix");
10346   DEFSYM (Qdos, "dos");
10347
10348   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10349   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10350   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10351   DEFSYM (Qdefault_char, "default-char");
10352   DEFSYM (Qundecided, "undecided");
10353   DEFSYM (Qno_conversion, "no-conversion");
10354   DEFSYM (Qraw_text, "raw-text");
10355
10356   DEFSYM (Qiso_2022, "iso-2022");
10357
10358   DEFSYM (Qutf_8, "utf-8");
10359   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10360
10361   DEFSYM (Qutf_16, "utf-16");
10362   DEFSYM (Qbig, "big");
10363   DEFSYM (Qlittle, "little");
10364
10365   DEFSYM (Qshift_jis, "shift-jis");
10366   DEFSYM (Qbig5, "big5");
10367
10368   DEFSYM (Qcoding_system_p, "coding-system-p");
10369
10370   DEFSYM (Qcoding_system_error, "coding-system-error");
10371   Fput (Qcoding_system_error, Qerror_conditions,
10372         pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
10373   Fput (Qcoding_system_error, Qerror_message,
10374         make_pure_c_string ("Invalid coding system"));
10375
10376   /* Intern this now in case it isn't already done.
10377      Setting this variable twice is harmless.
10378      But don't staticpro it here--that is done in alloc.c.  */
10379   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10380
10381   DEFSYM (Qtranslation_table, "translation-table");
10382   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10383   DEFSYM (Qtranslation_table_id, "translation-table-id");
10384   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10385   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10386
10387   DEFSYM (Qvalid_codes, "valid-codes");
10388
10389   DEFSYM (Qemacs_mule, "emacs-mule");
10390
10391   DEFSYM (QCcategory, ":category");
10392   DEFSYM (QCmnemonic, ":mnemonic");
10393   DEFSYM (QCdefault_char, ":default-char");
10394   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10395   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10396   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10397   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10398   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10399
10400   Vcoding_category_table
10401     = Fmake_vector (make_number (coding_category_max), Qnil);
10402   staticpro (&Vcoding_category_table);
10403   /* Followings are target of code detection.  */
10404   ASET (Vcoding_category_table, coding_category_iso_7,
10405         intern_c_string ("coding-category-iso-7"));
10406   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10407         intern_c_string ("coding-category-iso-7-tight"));
10408   ASET (Vcoding_category_table, coding_category_iso_8_1,
10409         intern_c_string ("coding-category-iso-8-1"));
10410   ASET (Vcoding_category_table, coding_category_iso_8_2,
10411         intern_c_string ("coding-category-iso-8-2"));
10412   ASET (Vcoding_category_table, coding_category_iso_7_else,
10413         intern_c_string ("coding-category-iso-7-else"));
10414   ASET (Vcoding_category_table, coding_category_iso_8_else,
10415         intern_c_string ("coding-category-iso-8-else"));
10416   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10417         intern_c_string ("coding-category-utf-8-auto"));
10418   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10419         intern_c_string ("coding-category-utf-8"));
10420   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10421         intern_c_string ("coding-category-utf-8-sig"));
10422   ASET (Vcoding_category_table, coding_category_utf_16_be,
10423         intern_c_string ("coding-category-utf-16-be"));
10424   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10425         intern_c_string ("coding-category-utf-16-auto"));
10426   ASET (Vcoding_category_table, coding_category_utf_16_le,
10427         intern_c_string ("coding-category-utf-16-le"));
10428   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10429         intern_c_string ("coding-category-utf-16-be-nosig"));
10430   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10431         intern_c_string ("coding-category-utf-16-le-nosig"));
10432   ASET (Vcoding_category_table, coding_category_charset,
10433         intern_c_string ("coding-category-charset"));
10434   ASET (Vcoding_category_table, coding_category_sjis,
10435         intern_c_string ("coding-category-sjis"));
10436   ASET (Vcoding_category_table, coding_category_big5,
10437         intern_c_string ("coding-category-big5"));
10438   ASET (Vcoding_category_table, coding_category_ccl,
10439         intern_c_string ("coding-category-ccl"));
10440   ASET (Vcoding_category_table, coding_category_emacs_mule,
10441         intern_c_string ("coding-category-emacs-mule"));
10442   /* Followings are NOT target of code detection.  */
10443   ASET (Vcoding_category_table, coding_category_raw_text,
10444         intern_c_string ("coding-category-raw-text"));
10445   ASET (Vcoding_category_table, coding_category_undecided,
10446         intern_c_string ("coding-category-undecided"));
10447
10448   DEFSYM (Qinsufficient_source, "insufficient-source");
10449   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10450   DEFSYM (Qinvalid_source, "invalid-source");
10451   DEFSYM (Qinterrupted, "interrupted");
10452   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10453   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10454
10455   defsubr (&Scoding_system_p);
10456   defsubr (&Sread_coding_system);
10457   defsubr (&Sread_non_nil_coding_system);
10458   defsubr (&Scheck_coding_system);
10459   defsubr (&Sdetect_coding_region);
10460   defsubr (&Sdetect_coding_string);
10461   defsubr (&Sfind_coding_systems_region_internal);
10462   defsubr (&Sunencodable_char_position);
10463   defsubr (&Scheck_coding_systems_region);
10464   defsubr (&Sdecode_coding_region);
10465   defsubr (&Sencode_coding_region);
10466   defsubr (&Sdecode_coding_string);
10467   defsubr (&Sencode_coding_string);
10468   defsubr (&Sdecode_sjis_char);
10469   defsubr (&Sencode_sjis_char);
10470   defsubr (&Sdecode_big5_char);
10471   defsubr (&Sencode_big5_char);
10472   defsubr (&Sset_terminal_coding_system_internal);
10473   defsubr (&Sset_safe_terminal_coding_system_internal);
10474   defsubr (&Sterminal_coding_system);
10475   defsubr (&Sset_keyboard_coding_system_internal);
10476   defsubr (&Skeyboard_coding_system);
10477   defsubr (&Sfind_operation_coding_system);
10478   defsubr (&Sset_coding_system_priority);
10479   defsubr (&Sdefine_coding_system_internal);
10480   defsubr (&Sdefine_coding_system_alias);
10481   defsubr (&Scoding_system_put);
10482   defsubr (&Scoding_system_base);
10483   defsubr (&Scoding_system_plist);
10484   defsubr (&Scoding_system_aliases);
10485   defsubr (&Scoding_system_eol_type);
10486   defsubr (&Scoding_system_priority_list);
10487
10488   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10489                doc: /* List of coding systems.
10490
10491 Do not alter the value of this variable manually.  This variable should be
10492 updated by the functions `define-coding-system' and
10493 `define-coding-system-alias'.  */);
10494   Vcoding_system_list = Qnil;
10495
10496   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10497                doc: /* Alist of coding system names.
10498 Each element is one element list of coding system name.
10499 This variable is given to `completing-read' as COLLECTION argument.
10500
10501 Do not alter the value of this variable manually.  This variable should be
10502 updated by the functions `make-coding-system' and
10503 `define-coding-system-alias'.  */);
10504   Vcoding_system_alist = Qnil;
10505
10506   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10507                doc: /* List of coding-categories (symbols) ordered by priority.
10508
10509 On detecting a coding system, Emacs tries code detection algorithms
10510 associated with each coding-category one by one in this order.  When
10511 one algorithm agrees with a byte sequence of source text, the coding
10512 system bound to the corresponding coding-category is selected.
10513
10514 Don't modify this variable directly, but use `set-coding-priority'.  */);
10515   {
10516     int i;
10517
10518     Vcoding_category_list = Qnil;
10519     for (i = coding_category_max - 1; i >= 0; i--)
10520       Vcoding_category_list
10521         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10522                  Vcoding_category_list);
10523   }
10524
10525   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10526                doc: /* Specify the coding system for read operations.
10527 It is useful to bind this variable with `let', but do not set it globally.
10528 If the value is a coding system, it is used for decoding on read operation.
10529 If not, an appropriate element is used from one of the coding system alists.
10530 There are three such tables: `file-coding-system-alist',
10531 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10532   Vcoding_system_for_read = Qnil;
10533
10534   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10535                doc: /* Specify the coding system for write operations.
10536 Programs bind this variable with `let', but you should not set it globally.
10537 If the value is a coding system, it is used for encoding of output,
10538 when writing it to a file and when sending it to a file or subprocess.
10539
10540 If this does not specify a coding system, an appropriate element
10541 is used from one of the coding system alists.
10542 There are three such tables: `file-coding-system-alist',
10543 `process-coding-system-alist', and `network-coding-system-alist'.
10544 For output to files, if the above procedure does not specify a coding system,
10545 the value of `buffer-file-coding-system' is used.  */);
10546   Vcoding_system_for_write = Qnil;
10547
10548   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10549                doc: /*
10550 Coding system used in the latest file or process I/O.  */);
10551   Vlast_coding_system_used = Qnil;
10552
10553   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10554                doc: /*
10555 Error status of the last code conversion.
10556
10557 When an error was detected in the last code conversion, this variable
10558 is set to one of the following symbols.
10559   `insufficient-source'
10560   `inconsistent-eol'
10561   `invalid-source'
10562   `interrupted'
10563   `insufficient-memory'
10564 When no error was detected, the value doesn't change.  So, to check
10565 the error status of a code conversion by this variable, you must
10566 explicitly set this variable to nil before performing code
10567 conversion.  */);
10568   Vlast_code_conversion_error = Qnil;
10569
10570   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10571                doc: /*
10572 *Non-nil means always inhibit code conversion of end-of-line format.
10573 See info node `Coding Systems' and info node `Text and Binary' concerning
10574 such conversion.  */);
10575   inhibit_eol_conversion = 0;
10576
10577   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10578                doc: /*
10579 Non-nil means process buffer inherits coding system of process output.
10580 Bind it to t if the process output is to be treated as if it were a file
10581 read from some filesystem.  */);
10582   inherit_process_coding_system = 0;
10583
10584   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10585                doc: /*
10586 Alist to decide a coding system to use for a file I/O operation.
10587 The format is ((PATTERN . VAL) ...),
10588 where PATTERN is a regular expression matching a file name,
10589 VAL is a coding system, a cons of coding systems, or a function symbol.
10590 If VAL is a coding system, it is used for both decoding and encoding
10591 the file contents.
10592 If VAL is a cons of coding systems, the car part is used for decoding,
10593 and the cdr part is used for encoding.
10594 If VAL is a function symbol, the function must return a coding system
10595 or a cons of coding systems which are used as above.  The function is
10596 called with an argument that is a list of the arguments with which
10597 `find-operation-coding-system' was called.  If the function can't decide
10598 a coding system, it can return `undecided' so that the normal
10599 code-detection is performed.
10600
10601 See also the function `find-operation-coding-system'
10602 and the variable `auto-coding-alist'.  */);
10603   Vfile_coding_system_alist = Qnil;
10604
10605   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10606                doc: /*
10607 Alist to decide a coding system to use for a process I/O operation.
10608 The format is ((PATTERN . VAL) ...),
10609 where PATTERN is a regular expression matching a program name,
10610 VAL is a coding system, a cons of coding systems, or a function symbol.
10611 If VAL is a coding system, it is used for both decoding what received
10612 from the program and encoding what sent to the program.
10613 If VAL is a cons of coding systems, the car part is used for decoding,
10614 and the cdr part is used for encoding.
10615 If VAL is a function symbol, the function must return a coding system
10616 or a cons of coding systems which are used as above.
10617
10618 See also the function `find-operation-coding-system'.  */);
10619   Vprocess_coding_system_alist = Qnil;
10620
10621   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10622                doc: /*
10623 Alist to decide a coding system to use for a network I/O operation.
10624 The format is ((PATTERN . VAL) ...),
10625 where PATTERN is a regular expression matching a network service name
10626 or is a port number to connect to,
10627 VAL is a coding system, a cons of coding systems, or a function symbol.
10628 If VAL is a coding system, it is used for both decoding what received
10629 from the network stream and encoding what sent to the network stream.
10630 If VAL is a cons of coding systems, the car part is used for decoding,
10631 and the cdr part is used for encoding.
10632 If VAL is a function symbol, the function must return a coding system
10633 or a cons of coding systems which are used as above.
10634
10635 See also the function `find-operation-coding-system'.  */);
10636   Vnetwork_coding_system_alist = Qnil;
10637
10638   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10639                doc: /* Coding system to use with system messages.
10640 Also used for decoding keyboard input on X Window system.  */);
10641   Vlocale_coding_system = Qnil;
10642
10643   /* The eol mnemonics are reset in startup.el system-dependently.  */
10644   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10645                doc: /*
10646 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10647   eol_mnemonic_unix = make_pure_c_string (":");
10648
10649   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10650                doc: /*
10651 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10652   eol_mnemonic_dos = make_pure_c_string ("\\");
10653
10654   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10655                doc: /*
10656 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10657   eol_mnemonic_mac = make_pure_c_string ("/");
10658
10659   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10660                doc: /*
10661 *String displayed in mode line when end-of-line format is not yet determined.  */);
10662   eol_mnemonic_undecided = make_pure_c_string (":");
10663
10664   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10665                doc: /*
10666 *Non-nil enables character translation while encoding and decoding.  */);
10667   Venable_character_translation = Qt;
10668
10669   DEFVAR_LISP ("standard-translation-table-for-decode",
10670                &Vstandard_translation_table_for_decode,
10671                doc: /* Table for translating characters while decoding.  */);
10672   Vstandard_translation_table_for_decode = Qnil;
10673
10674   DEFVAR_LISP ("standard-translation-table-for-encode",
10675                &Vstandard_translation_table_for_encode,
10676                doc: /* Table for translating characters while encoding.  */);
10677   Vstandard_translation_table_for_encode = Qnil;
10678
10679   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10680                doc: /* Alist of charsets vs revision numbers.
10681 While encoding, if a charset (car part of an element) is found,
10682 designate it with the escape sequence identifying revision (cdr part
10683 of the element).  */);
10684   Vcharset_revision_table = Qnil;
10685
10686   DEFVAR_LISP ("default-process-coding-system",
10687                &Vdefault_process_coding_system,
10688                doc: /* Cons of coding systems used for process I/O by default.
10689 The car part is used for decoding a process output,
10690 the cdr part is used for encoding a text to be sent to a process.  */);
10691   Vdefault_process_coding_system = Qnil;
10692
10693   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10694                doc: /*
10695 Table of extra Latin codes in the range 128..159 (inclusive).
10696 This is a vector of length 256.
10697 If Nth element is non-nil, the existence of code N in a file
10698 \(or output of subprocess) doesn't prevent it to be detected as
10699 a coding system of ISO 2022 variant which has a flag
10700 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10701 or reading output of a subprocess.
10702 Only 128th through 159th elements have a meaning.  */);
10703   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10704
10705   DEFVAR_LISP ("select-safe-coding-system-function",
10706                &Vselect_safe_coding_system_function,
10707                doc: /*
10708 Function to call to select safe coding system for encoding a text.
10709
10710 If set, this function is called to force a user to select a proper
10711 coding system which can encode the text in the case that a default
10712 coding system used in each operation can't encode the text.  The
10713 function should take care that the buffer is not modified while
10714 the coding system is being selected.
10715
10716 The default value is `select-safe-coding-system' (which see).  */);
10717   Vselect_safe_coding_system_function = Qnil;
10718
10719   DEFVAR_BOOL ("coding-system-require-warning",
10720                &coding_system_require_warning,
10721                doc: /* Internal use only.
10722 If non-nil, on writing a file, `select-safe-coding-system-function' is
10723 called even if `coding-system-for-write' is non-nil.  The command
10724 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10725   coding_system_require_warning = 0;
10726
10727
10728   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10729                &inhibit_iso_escape_detection,
10730                doc: /*
10731 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10732
10733 When Emacs reads text, it tries to detect how the text is encoded.
10734 This code detection is sensitive to escape sequences.  If Emacs sees
10735 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10736 of the ISO2022 encodings, and decodes text by the corresponding coding
10737 system (e.g. `iso-2022-7bit').
10738
10739 However, there may be a case that you want to read escape sequences in
10740 a file as is.  In such a case, you can set this variable to non-nil.
10741 Then the code detection will ignore any escape sequences, and no text is
10742 detected as encoded in some ISO-2022 encoding.  The result is that all
10743 escape sequences become visible in a buffer.
10744
10745 The default value is nil, and it is strongly recommended not to change
10746 it.  That is because many Emacs Lisp source files that contain
10747 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10748 in Emacs's distribution, and they won't be decoded correctly on
10749 reading if you suppress escape sequence detection.
10750
10751 The other way to read escape sequences in a file without decoding is
10752 to explicitly specify some coding system that doesn't use ISO-2022
10753 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10754   inhibit_iso_escape_detection = 0;
10755
10756   DEFVAR_BOOL ("inhibit-null-byte-detection",
10757                &inhibit_null_byte_detection,
10758                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10759 By default, Emacs treats it as binary data, and does not attempt to
10760 decode it.  The effect is as if you specified `no-conversion' for
10761 reading that text.
10762
10763 Set this to non-nil when a regular text happens to include null bytes.
10764 Examples are Index nodes of Info files and null-byte delimited output
10765 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10766 decode text as usual.  */);
10767   inhibit_null_byte_detection = 0;
10768
10769   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10770                doc: /* Char table for translating self-inserting characters.
10771 This is applied to the result of input methods, not their input.
10772 See also `keyboard-translate-table'.
10773
10774 Use of this variable for character code unification was rendered
10775 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10776 internal character representation.  */);
10777     Vtranslation_table_for_input = Qnil;
10778
10779   {
10780     Lisp_Object args[coding_arg_max];
10781     Lisp_Object plist[16];
10782     int i;
10783
10784     for (i = 0; i < coding_arg_max; i++)
10785       args[i] = Qnil;
10786
10787     plist[0] = intern_c_string (":name");
10788     plist[1] = args[coding_arg_name] = Qno_conversion;
10789     plist[2] = intern_c_string (":mnemonic");
10790     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10791     plist[4] = intern_c_string (":coding-type");
10792     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10793     plist[6] = intern_c_string (":ascii-compatible-p");
10794     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10795     plist[8] = intern_c_string (":default-char");
10796     plist[9] = args[coding_arg_default_char] = make_number (0);
10797     plist[10] = intern_c_string (":for-unibyte");
10798     plist[11] = args[coding_arg_for_unibyte] = Qt;
10799     plist[12] = intern_c_string (":docstring");
10800     plist[13] = make_pure_c_string ("Do no conversion.\n\
10801 \n\
10802 When you visit a file with this coding, the file is read into a\n\
10803 unibyte buffer as is, thus each byte of a file is treated as a\n\
10804 character.");
10805     plist[14] = intern_c_string (":eol-type");
10806     plist[15] = args[coding_arg_eol_type] = Qunix;
10807     args[coding_arg_plist] = Flist (16, plist);
10808     Fdefine_coding_system_internal (coding_arg_max, args);
10809
10810     plist[1] = args[coding_arg_name] = Qundecided;
10811     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10812     plist[5] = args[coding_arg_coding_type] = Qundecided;
10813     /* This is already set.
10814        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10815     plist[8] = intern_c_string (":charset-list");
10816     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10817     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10818     plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10819     plist[15] = args[coding_arg_eol_type] = Qnil;
10820     args[coding_arg_plist] = Flist (16, plist);
10821     Fdefine_coding_system_internal (coding_arg_max, args);
10822   }
10823
10824   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10825
10826   {
10827     int i;
10828
10829     for (i = 0; i < coding_category_max; i++)
10830       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10831   }
10832 #if defined (DOS_NT)
10833   system_eol_type = Qdos;
10834 #else
10835   system_eol_type = Qunix;
10836 #endif
10837   staticpro (&system_eol_type);
10838 }
10839
10840 char *
10841 emacs_strerror (int error_number)
10842 {
10843   char *str;
10844
10845   synchronize_system_messages_locale ();
10846   str = strerror (error_number);
10847
10848   if (! NILP (Vlocale_coding_system))
10849     {
10850       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10851                                                       Vlocale_coding_system,
10852                                                       0);
10853       str = (char *) SDATA (dec);
10854     }
10855
10856   return str;
10857 }
10858
10859 #endif /* emacs */
10860
10861 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10862    (do not change this comment) */