src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008, 2009, 2010
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292 #include <setjmp.h>
 293
 294 #include "lisp.h"
 295 #include "buffer.h"
 296 #include "character.h"
 297 #include "charset.h"
 298 #include "ccl.h"
 299 #include "composite.h"
 300 #include "coding.h"
 301 #include "window.h"
 302 #include "frame.h"
 303 #include "termhooks.h"
 304
 305 Lisp_Object Vcoding_system_hash_table;
 306
 307 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 308 Lisp_Object Qunix, Qdos;
 309 extern Lisp_Object Qmac;        /* frame.c */
 310 Lisp_Object Qbuffer_file_coding_system;
 311 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 312 Lisp_Object Qdefault_char;
 313 Lisp_Object Qno_conversion, Qundecided;
 314 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 315 Lisp_Object Qbig, Qlittle;
 316 Lisp_Object Qcoding_system_history;
 317 Lisp_Object Qvalid_codes;
 318 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 319 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 320 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 321 Lisp_Object QCascii_compatible_p;
 322
 323 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 324 Lisp_Object Qcall_process, Qcall_process_region;
 325 Lisp_Object Qstart_process, Qopen_network_stream;
 326 Lisp_Object Qtarget_idx;
 327
 328 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 329 Lisp_Object Qinterrupted, Qinsufficient_memory;
 330
 331 extern Lisp_Object Qcompletion_ignore_case;
 332
 333 /* If a symbol has this property, evaluate the value to define the
 334    symbol as a coding system.  */
 335 static Lisp_Object Qcoding_system_define_form;
 336
 337 int coding_system_require_warning;
 338
 339 Lisp_Object Vselect_safe_coding_system_function;
 340
 341 /* Mnemonic string for each format of end-of-line.  */
 342 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 343 /* Mnemonic string to indicate format of end-of-line is not yet
 344    decided.  */
 345 Lisp_Object eol_mnemonic_undecided;
 346
 347 /* Format of end-of-line decided by system.  This is Qunix on
 348    Unix and Mac, Qdos on DOS/Windows.
 349    This has an effect only for external encoding (i.e. for output to
 350    file and process), not for in-buffer or Lisp string encoding.  */
 351 static Lisp_Object system_eol_type;
 352
 353 #ifdef emacs
 354
 355 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 356
 357 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 358
 359 /* Coding system emacs-mule and raw-text are for converting only
 360    end-of-line format.  */
 361 Lisp_Object Qemacs_mule, Qraw_text;
 362 Lisp_Object Qutf_8_emacs;
 363
 364 /* Coding-systems are handed between Emacs Lisp programs and C internal
 365    routines by the following three variables.  */
 366 /* Coding-system for reading files and receiving data from process.  */
 367 Lisp_Object Vcoding_system_for_read;
 368 /* Coding-system for writing files and sending data to process.  */
 369 Lisp_Object Vcoding_system_for_write;
 370 /* Coding-system actually used in the latest I/O.  */
 371 Lisp_Object Vlast_coding_system_used;
 372 /* Set to non-nil when an error is detected while code conversion.  */
 373 Lisp_Object Vlast_code_conversion_error;
 374 /* A vector of length 256 which contains information about special
 375    Latin codes (especially for dealing with Microsoft codes).  */
 376 Lisp_Object Vlatin_extra_code_table;
 377
 378 /* Flag to inhibit code conversion of end-of-line format.  */
 379 int inhibit_eol_conversion;
 380
 381 /* Flag to inhibit ISO2022 escape sequence detection.  */
 382 int inhibit_iso_escape_detection;
 383
 384 /* Flag to inhibit detection of binary files through null bytes.  */
 385 int inhibit_null_byte_detection;
 386
 387 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 388 int inherit_process_coding_system;
 389
 390 /* Coding system to be used to encode text for terminal display when
 391    terminal coding system is nil.  */
 392 struct coding_system safe_terminal_coding;
 393
 394 Lisp_Object Vfile_coding_system_alist;
 395 Lisp_Object Vprocess_coding_system_alist;
 396 Lisp_Object Vnetwork_coding_system_alist;
 397
 398 Lisp_Object Vlocale_coding_system;
 399
 400 #endif /* emacs */
 401
 402 /* Flag to tell if we look up translation table on character code
 403    conversion.  */
 404 Lisp_Object Venable_character_translation;
 405 /* Standard translation table to look up on decoding (reading).  */
 406 Lisp_Object Vstandard_translation_table_for_decode;
 407 /* Standard translation table to look up on encoding (writing).  */
 408 Lisp_Object Vstandard_translation_table_for_encode;
 409
 410 Lisp_Object Qtranslation_table;
 411 Lisp_Object Qtranslation_table_id;
 412 Lisp_Object Qtranslation_table_for_decode;
 413 Lisp_Object Qtranslation_table_for_encode;
 414
 415 /* Alist of charsets vs revision number.  */
 416 static Lisp_Object Vcharset_revision_table;
 417
 418 /* Default coding systems used for process I/O.  */
 419 Lisp_Object Vdefault_process_coding_system;
 420
 421 /* Char table for translating Quail and self-inserting input.  */
 422 Lisp_Object Vtranslation_table_for_input;
 423
 424 /* Two special coding systems.  */
 425 Lisp_Object Vsjis_coding_system;
 426 Lisp_Object Vbig5_coding_system;
 427
 428 /* ISO2022 section */
 429
 430 #define CODING_ISO_INITIAL(coding, reg)                 \
 431   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 432                      coding_attr_iso_initial),          \
 433                reg)))
 434
 435
 436 #define CODING_ISO_REQUEST(coding, charset_id)          \
 437   (((charset_id) <= (coding)->max_charset_id            \
 438     ? ((coding)->safe_charsets[charset_id] != 255       \
 439        ? (coding)->safe_charsets[charset_id]            \
 440        : -1)                                            \
 441     : -1))
 442
 443
 444 #define CODING_ISO_FLAGS(coding)        \
 445   ((coding)->spec.iso_2022.flags)
 446 #define CODING_ISO_DESIGNATION(coding, reg)     \
 447   ((coding)->spec.iso_2022.current_designation[reg])
 448 #define CODING_ISO_INVOCATION(coding, plane)    \
 449   ((coding)->spec.iso_2022.current_invocation[plane])
 450 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 451   ((coding)->spec.iso_2022.single_shifting)
 452 #define CODING_ISO_BOL(coding)  \
 453   ((coding)->spec.iso_2022.bol)
 454 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 455   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 456 #define CODING_ISO_CMP_STATUS(coding)   \
 457   (&(coding)->spec.iso_2022.cmp_status)
 458 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 459   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 460 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 461   ((coding)->spec.iso_2022.embedded_utf_8)
 462
 463 /* Control characters of ISO2022.  */
 464                         /* code */      /* function */
 465 #define ISO_CODE_LF     0x0A            /* line-feed */
 466 #define ISO_CODE_CR     0x0D            /* carriage-return */
 467 #define ISO_CODE_SO     0x0E            /* shift-out */
 468 #define ISO_CODE_SI     0x0F            /* shift-in */
 469 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 470 #define ISO_CODE_ESC    0x1B            /* escape */
 471 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 472 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 473 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 474
 475 /* All code (1-byte) of ISO2022 is classified into one of the
 476    followings.  */
 477 enum iso_code_class_type
 478   {
 479     ISO_control_0,              /* Control codes in the range
 480                                    0x00..0x1F and 0x7F, except for the
 481                                    following 5 codes.  */
 482     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 483     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 484     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 485     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 486     ISO_control_1,              /* Control codes in the range
 487                                    0x80..0x9F, except for the
 488                                    following 3 codes.  */
 489     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 490     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 491     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 492     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 493     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 494     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 495     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 496   };
 497
 498 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 499     `iso-flags' attribute of an iso2022 coding system.  */
 500
 501 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 502    instead of the correct short-form sequence (e.g. ESC $ A).  */
 503 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 504
 505 /* If set, reset graphic planes and registers at end-of-line to the
 506    initial state.  */
 507 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 508
 509 /* If set, reset graphic planes and registers before any control
 510    characters to the initial state.  */
 511 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 512
 513 /* If set, encode by 7-bit environment.  */
 514 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 515
 516 /* If set, use locking-shift function.  */
 517 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 518
 519 /* If set, use single-shift function.  Overwrite
 520    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 521 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 522
 523 /* If set, use designation escape sequence.  */
 524 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 525
 526 /* If set, produce revision number sequence.  */
 527 #define CODING_ISO_FLAG_REVISION        0x0080
 528
 529 /* If set, produce ISO6429's direction specifying sequence.  */
 530 #define CODING_ISO_FLAG_DIRECTION       0x0100
 531
 532 /* If set, assume designation states are reset at beginning of line on
 533    output.  */
 534 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 535
 536 /* If set, designation sequence should be placed at beginning of line
 537    on output.  */
 538 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 539
 540 /* If set, do not encode unsafe charactes on output.  */
 541 #define CODING_ISO_FLAG_SAFE            0x0800
 542
 543 /* If set, extra latin codes (128..159) are accepted as a valid code
 544    on input.  */
 545 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 546
 547 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 548
 549 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 550
 551 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 552
 553 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 554
 555 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 556
 557 /* A character to be produced on output if encoding of the original
 558    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 559 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 560
 561 /* UTF-8 section */
 562 #define CODING_UTF_8_BOM(coding)        \
 563   ((coding)->spec.utf_8_bom)
 564
 565 /* UTF-16 section */
 566 #define CODING_UTF_16_BOM(coding)       \
 567   ((coding)->spec.utf_16.bom)
 568
 569 #define CODING_UTF_16_ENDIAN(coding)    \
 570   ((coding)->spec.utf_16.endian)
 571
 572 #define CODING_UTF_16_SURROGATE(coding) \
 573   ((coding)->spec.utf_16.surrogate)
 574
 575
 576 /* CCL section */
 577 #define CODING_CCL_DECODER(coding)      \
 578   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 579 #define CODING_CCL_ENCODER(coding)      \
 580   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 581 #define CODING_CCL_VALIDS(coding)                                          \
 582   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 583
 584 /* Index for each coding category in `coding_categories' */
 585
 586 enum coding_category
 587   {
 588     coding_category_iso_7,
 589     coding_category_iso_7_tight,
 590     coding_category_iso_8_1,
 591     coding_category_iso_8_2,
 592     coding_category_iso_7_else,
 593     coding_category_iso_8_else,
 594     coding_category_utf_8_auto,
 595     coding_category_utf_8_nosig,
 596     coding_category_utf_8_sig,
 597     coding_category_utf_16_auto,
 598     coding_category_utf_16_be,
 599     coding_category_utf_16_le,
 600     coding_category_utf_16_be_nosig,
 601     coding_category_utf_16_le_nosig,
 602     coding_category_charset,
 603     coding_category_sjis,
 604     coding_category_big5,
 605     coding_category_ccl,
 606     coding_category_emacs_mule,
 607     /* All above are targets of code detection.  */
 608     coding_category_raw_text,
 609     coding_category_undecided,
 610     coding_category_max
 611   };
 612
 613 /* Definitions of flag bits used in detect_coding_XXXX.  */
 614 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 615 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 616 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 617 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 618 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 619 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 620 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 621 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 622 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 623 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 624 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 625 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 626 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 627 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 628 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 629 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 630 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 631 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 632 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 633 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 634
 635 /* This value is returned if detect_coding_mask () find nothing other
 636    than ASCII characters.  */
 637 #define CATEGORY_MASK_ANY               \
 638   (CATEGORY_MASK_ISO_7                  \
 639    | CATEGORY_MASK_ISO_7_TIGHT          \
 640    | CATEGORY_MASK_ISO_8_1              \
 641    | CATEGORY_MASK_ISO_8_2              \
 642    | CATEGORY_MASK_ISO_7_ELSE           \
 643    | CATEGORY_MASK_ISO_8_ELSE           \
 644    | CATEGORY_MASK_UTF_8_AUTO           \
 645    | CATEGORY_MASK_UTF_8_NOSIG          \
 646    | CATEGORY_MASK_UTF_8_SIG            \
 647    | CATEGORY_MASK_UTF_16_AUTO          \
 648    | CATEGORY_MASK_UTF_16_BE            \
 649    | CATEGORY_MASK_UTF_16_LE            \
 650    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 651    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 652    | CATEGORY_MASK_CHARSET              \
 653    | CATEGORY_MASK_SJIS                 \
 654    | CATEGORY_MASK_BIG5                 \
 655    | CATEGORY_MASK_CCL                  \
 656    | CATEGORY_MASK_EMACS_MULE)
 657
 658
 659 #define CATEGORY_MASK_ISO_7BIT \
 660   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 661
 662 #define CATEGORY_MASK_ISO_8BIT \
 663   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 664
 665 #define CATEGORY_MASK_ISO_ELSE \
 666   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 667
 668 #define CATEGORY_MASK_ISO_ESCAPE        \
 669   (CATEGORY_MASK_ISO_7                  \
 670    | CATEGORY_MASK_ISO_7_TIGHT          \
 671    | CATEGORY_MASK_ISO_7_ELSE           \
 672    | CATEGORY_MASK_ISO_8_ELSE)
 673
 674 #define CATEGORY_MASK_ISO       \
 675   (  CATEGORY_MASK_ISO_7BIT     \
 676      | CATEGORY_MASK_ISO_8BIT   \
 677      | CATEGORY_MASK_ISO_ELSE)
 678
 679 #define CATEGORY_MASK_UTF_16            \
 680   (CATEGORY_MASK_UTF_16_AUTO            \
 681    | CATEGORY_MASK_UTF_16_BE            \
 682    | CATEGORY_MASK_UTF_16_LE            \
 683    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 684    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 685
 686 #define CATEGORY_MASK_UTF_8     \
 687   (CATEGORY_MASK_UTF_8_AUTO     \
 688    | CATEGORY_MASK_UTF_8_NOSIG  \
 689    | CATEGORY_MASK_UTF_8_SIG)
 690
 691 /* List of symbols `coding-category-xxx' ordered by priority.  This
 692    variable is exposed to Emacs Lisp.  */
 693 static Lisp_Object Vcoding_category_list;
 694
 695 /* Table of coding categories (Lisp symbols).  This variable is for
 696    internal use oly.  */
 697 static Lisp_Object Vcoding_category_table;
 698
 699 /* Table of coding-categories ordered by priority.  */
 700 static enum coding_category coding_priorities[coding_category_max];
 701
 702 /* Nth element is a coding context for the coding system bound to the
 703    Nth coding category.  */
 704 static struct coding_system coding_categories[coding_category_max];
 705
 706 /*** Commonly used macros and functions ***/
 707
 708 #ifndef min
 709 #define min(a, b) ((a) < (b) ? (a) : (b))
 710 #endif
 711 #ifndef max
 712 #define max(a, b) ((a) > (b) ? (a) : (b))
 713 #endif
 714
 715 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 716   do {                                                  \
 717     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 718     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 719   } while (0)
 720
 721
 722 /* Safely get one byte from the source text pointed by SRC which ends
 723    at SRC_END, and set C to that byte.  If there are not enough bytes
 724    in the source, it jumps to `no_more_source'.  If multibytep is
 725    nonzero, and a multibyte character is found at SRC, set C to the
 726    negative value of the character code.  The caller should declare
 727    and set these variables appropriately in advance:
 728         src, src_end, multibytep */
 729
 730 #define ONE_MORE_BYTE(c)                                \
 731   do {                                                  \
 732     if (src == src_end)                                 \
 733       {                                                 \
 734         if (src_base < src)                             \
 735           record_conversion_result                      \
 736             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 737         goto no_more_source;                            \
 738       }                                                 \
 739     c = *src++;                                         \
 740     if (multibytep && (c & 0x80))                       \
 741       {                                                 \
 742         if ((c & 0xFE) == 0xC0)                         \
 743           c = ((c & 1) << 6) | *src++;                  \
 744         else                                            \
 745           {                                             \
 746             src--;                                      \
 747             c = - string_char (src, &src, NULL);        \
 748             record_conversion_result                    \
 749               (coding, CODING_RESULT_INVALID_SRC);      \
 750           }                                             \
 751       }                                                 \
 752     consumed_chars++;                                   \
 753   } while (0)
 754
 755 /* Safely get two bytes from the source text pointed by SRC which ends
 756    at SRC_END, and set C1 and C2 to those bytes while skipping the
 757    heading multibyte characters.  If there are not enough bytes in the
 758    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 759    a multibyte character is found for C2, set C2 to the negative value
 760    of the character code.  The caller should declare and set these
 761    variables appropriately in advance:
 762         src, src_end, multibytep
 763    It is intended that this macro is used in detect_coding_utf_16.  */
 764
 765 #define TWO_MORE_BYTES(c1, c2)                          \
 766   do {                                                  \
 767     do {                                                \
 768       if (src == src_end)                               \
 769         goto no_more_source;                            \
 770       c1 = *src++;                                      \
 771       if (multibytep && (c1 & 0x80))                    \
 772         {                                               \
 773           if ((c1 & 0xFE) == 0xC0)                      \
 774             c1 = ((c1 & 1) << 6) | *src++;              \
 775           else                                          \
 776             {                                           \
 777               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 778               c1 = -1;                                  \
 779             }                                           \
 780         }                                               \
 781     } while (c1 < 0);                                   \
 782     if (src == src_end)                                 \
 783       goto no_more_source;                              \
 784     c2 = *src++;                                        \
 785     if (multibytep && (c2 & 0x80))                      \
 786       {                                                 \
 787         if ((c2 & 0xFE) == 0xC0)                        \
 788           c2 = ((c2 & 1) << 6) | *src++;                \
 789         else                                            \
 790           c2 = -1;                                      \
 791       }                                                 \
 792   } while (0)
 793
 794
 795 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 796   do {                                                  \
 797     c = *src++;                                         \
 798     if (multibytep && (c & 0x80))                       \
 799       {                                                 \
 800         if ((c & 0xFE) == 0xC0)                         \
 801           c = ((c & 1) << 6) | *src++;                  \
 802         else                                            \
 803           {                                             \
 804             src--;                                      \
 805             c = - string_char (src, &src, NULL);        \
 806             record_conversion_result                    \
 807               (coding, CODING_RESULT_INVALID_SRC);      \
 808           }                                             \
 809       }                                                 \
 810     consumed_chars++;                                   \
 811   } while (0)
 812
 813
 814 /* Store a byte C in the place pointed by DST and increment DST to the
 815    next free point, and increment PRODUCED_CHARS.  The caller should
 816    assure that C is 0..127, and declare and set the variable `dst'
 817    appropriately in advance.
 818 */
 819
 820
 821 #define EMIT_ONE_ASCII_BYTE(c)  \
 822   do {                          \
 823     produced_chars++;           \
 824     *dst++ = (c);               \
 825   } while (0)
 826
 827
 828 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 829
 830 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 831   do {                                  \
 832     produced_chars += 2;                \
 833     *dst++ = (c1), *dst++ = (c2);       \
 834   } while (0)
 835
 836
 837 /* Store a byte C in the place pointed by DST and increment DST to the
 838    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 839    nonzero, store in an appropriate multibyte from.  The caller should
 840    declare and set the variables `dst' and `multibytep' appropriately
 841    in advance.  */
 842
 843 #define EMIT_ONE_BYTE(c)                \
 844   do {                                  \
 845     produced_chars++;                   \
 846     if (multibytep)                     \
 847       {                                 \
 848         int ch = (c);                   \
 849         if (ch >= 0x80)                 \
 850           ch = BYTE8_TO_CHAR (ch);      \
 851         CHAR_STRING_ADVANCE (ch, dst);  \
 852       }                                 \
 853     else                                \
 854       *dst++ = (c);                     \
 855   } while (0)
 856
 857
 858 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 859
 860 #define EMIT_TWO_BYTES(c1, c2)          \
 861   do {                                  \
 862     produced_chars += 2;                \
 863     if (multibytep)                     \
 864       {                                 \
 865         int ch;                         \
 866                                         \
 867         ch = (c1);                      \
 868         if (ch >= 0x80)                 \
 869           ch = BYTE8_TO_CHAR (ch);      \
 870         CHAR_STRING_ADVANCE (ch, dst);  \
 871         ch = (c2);                      \
 872         if (ch >= 0x80)                 \
 873           ch = BYTE8_TO_CHAR (ch);      \
 874         CHAR_STRING_ADVANCE (ch, dst);  \
 875       }                                 \
 876     else                                \
 877       {                                 \
 878         *dst++ = (c1);                  \
 879         *dst++ = (c2);                  \
 880       }                                 \
 881   } while (0)
 882
 883
 884 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 885   do {                                  \
 886     EMIT_ONE_BYTE (c1);                 \
 887     EMIT_TWO_BYTES (c2, c3);            \
 888   } while (0)
 889
 890
 891 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 892   do {                                          \
 893     EMIT_TWO_BYTES (c1, c2);                    \
 894     EMIT_TWO_BYTES (c3, c4);                    \
 895   } while (0)
 896
 897
 898 /* Prototypes for static functions.  */
 899 static void record_conversion_result P_ ((struct coding_system *coding,
 900                                           enum coding_result_code result));
 901 static int detect_coding_utf_8 P_ ((struct coding_system *,
 902                                     struct coding_detection_info *info));
 903 static void decode_coding_utf_8 P_ ((struct coding_system *));
 904 static int encode_coding_utf_8 P_ ((struct coding_system *));
 905
 906 static int detect_coding_utf_16 P_ ((struct coding_system *,
 907                                      struct coding_detection_info *info));
 908 static void decode_coding_utf_16 P_ ((struct coding_system *));
 909 static int encode_coding_utf_16 P_ ((struct coding_system *));
 910
 911 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 912                                        struct coding_detection_info *info));
 913 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 914 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 915
 916 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 917                                          struct coding_detection_info *info));
 918 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 919 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 920
 921 static int detect_coding_sjis P_ ((struct coding_system *,
 922                                    struct coding_detection_info *info));
 923 static void decode_coding_sjis P_ ((struct coding_system *));
 924 static int encode_coding_sjis P_ ((struct coding_system *));
 925
 926 static int detect_coding_big5 P_ ((struct coding_system *,
 927                                    struct coding_detection_info *info));
 928 static void decode_coding_big5 P_ ((struct coding_system *));
 929 static int encode_coding_big5 P_ ((struct coding_system *));
 930
 931 static int detect_coding_ccl P_ ((struct coding_system *,
 932                                   struct coding_detection_info *info));
 933 static void decode_coding_ccl P_ ((struct coding_system *));
 934 static int encode_coding_ccl P_ ((struct coding_system *));
 935
 936 static void decode_coding_raw_text P_ ((struct coding_system *));
 937 static int encode_coding_raw_text P_ ((struct coding_system *));
 938
 939 static void coding_set_source P_ ((struct coding_system *));
 940 static void coding_set_destination P_ ((struct coding_system *));
 941 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 942 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 943                                             EMACS_INT, EMACS_INT));
 944 static unsigned char *alloc_destination P_ ((struct coding_system *,
 945                                              EMACS_INT, unsigned char *));
 946 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 947 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 948                                                      int *, int *,
 949                                                      unsigned char *));
 950 static int detect_eol P_ ((const unsigned char *,
 951                            EMACS_INT, enum coding_category));
 952 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 953 static void decode_eol P_ ((struct coding_system *));
 954 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 955 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *));
 956 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 957 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 958                                         EMACS_INT));
 959 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 960 static int decode_coding P_ ((struct coding_system *));
 961 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 962                                                       struct coding_system *,
 963                                                       int *, EMACS_INT *));
 964 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 965                                                   struct coding_system *,
 966                                                   int *, EMACS_INT *));
 967 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 968 static int encode_coding P_ ((struct coding_system *));
 969 static Lisp_Object make_conversion_work_buffer P_ ((int));
 970 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 971 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 972 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 973
 974 static void
 975 record_conversion_result (struct coding_system *coding,
 976                           enum coding_result_code result)
 977 {
 978   coding->result = result;
 979   switch (result)
 980     {
 981     case CODING_RESULT_INSUFFICIENT_SRC:
 982       Vlast_code_conversion_error = Qinsufficient_source;
 983       break;
 984     case CODING_RESULT_INCONSISTENT_EOL:
 985       Vlast_code_conversion_error = Qinconsistent_eol;
 986       break;
 987     case CODING_RESULT_INVALID_SRC:
 988       Vlast_code_conversion_error = Qinvalid_source;
 989       break;
 990     case CODING_RESULT_INTERRUPT:
 991       Vlast_code_conversion_error = Qinterrupted;
 992       break;
 993     case CODING_RESULT_INSUFFICIENT_MEM:
 994       Vlast_code_conversion_error = Qinsufficient_memory;
 995       break;
 996     case CODING_RESULT_INSUFFICIENT_DST:
 997       /* Don't record this error in Vlast_code_conversion_error
 998          because it happens just temporarily and is resolved when the
 999          whole conversion is finished.  */
1000       break;
1001     case CODING_RESULT_SUCCESS:
1002       break;
1003     default:
1004       Vlast_code_conversion_error = intern ("Unknown error");
1005     }
1006 }
1007
1008 /* This wrapper macro is used to preserve validity of pointers into
1009    buffer text across calls to decode_char, which could cause
1010    relocation of buffers if it loads a charset map, because loading a
1011    charset map allocates large structures.  */
1012 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1013   do {                                                                       \
1014     charset_map_loaded = 0;                                                  \
1015     c = DECODE_CHAR (charset, code);                                         \
1016     if (charset_map_loaded)                                                  \
1017       {                                                                      \
1018         const unsigned char *orig = coding->source;                          \
1019         EMACS_INT offset;                                                    \
1020                                                                              \
1021         coding_set_source (coding);                                          \
1022         offset = coding->source - orig;                                      \
1023         src += offset;                                                       \
1024         src_base += offset;                                                  \
1025         src_end += offset;                                                   \
1026       }                                                                      \
1027   } while (0)
1028
1029
1030 /* If there are at least BYTES length of room at dst, allocate memory
1031    for coding->destination and update dst and dst_end.  We don't have
1032    to take care of coding->source which will be relocated.  It is
1033    handled by calling coding_set_source in encode_coding.  */
1034
1035 #define ASSURE_DESTINATION(bytes)                               \
1036   do {                                                          \
1037     if (dst + (bytes) >= dst_end)                               \
1038       {                                                         \
1039         int more_bytes = charbuf_end - charbuf + (bytes);       \
1040                                                                 \
1041         dst = alloc_destination (coding, more_bytes, dst);      \
1042         dst_end = coding->destination + coding->dst_bytes;      \
1043       }                                                         \
1044   } while (0)
1045
1046
1047 /* Store multibyte form of the character C in P, and advance P to the
1048    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1049    never calls MAYBE_UNIFY_CHAR.  */
1050
1051 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1052   do {                                          \
1053     if ((c) <= MAX_1_BYTE_CHAR)                 \
1054       *(p)++ = (c);                             \
1055     else if ((c) <= MAX_2_BYTE_CHAR)            \
1056       *(p)++ = (0xC0 | ((c) >> 6)),             \
1057         *(p)++ = (0x80 | ((c) & 0x3F));         \
1058     else if ((c) <= MAX_3_BYTE_CHAR)            \
1059       *(p)++ = (0xE0 | ((c) >> 12)),            \
1060         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1061         *(p)++ = (0x80 | ((c) & 0x3F));         \
1062     else if ((c) <= MAX_4_BYTE_CHAR)            \
1063       *(p)++ = (0xF0 | (c >> 18)),              \
1064         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1065         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1066         *(p)++ = (0x80 | (c & 0x3F));           \
1067     else if ((c) <= MAX_5_BYTE_CHAR)            \
1068       *(p)++ = 0xF8,                            \
1069         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1070         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1071         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1072         *(p)++ = (0x80 | (c & 0x3F));           \
1073     else                                        \
1074       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1075   } while (0)
1076
1077
1078 /* Return the character code of character whose multibyte form is at
1079    P, and advance P to the end of the multibyte form.  This is like
1080    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1081
1082 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1083   (!((p)[0] & 0x80)                                             \
1084    ? *(p)++                                                     \
1085    : ! ((p)[0] & 0x20)                                          \
1086    ? ((p) += 2,                                                 \
1087       ((((p)[-2] & 0x1F) << 6)                                  \
1088        | ((p)[-1] & 0x3F)                                       \
1089        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1090    : ! ((p)[0] & 0x10)                                          \
1091    ? ((p) += 3,                                                 \
1092       ((((p)[-3] & 0x0F) << 12)                                 \
1093        | (((p)[-2] & 0x3F) << 6)                                \
1094        | ((p)[-1] & 0x3F)))                                     \
1095    : ! ((p)[0] & 0x08)                                          \
1096    ? ((p) += 4,                                                 \
1097       ((((p)[-4] & 0xF) << 18)                                  \
1098        | (((p)[-3] & 0x3F) << 12)                               \
1099        | (((p)[-2] & 0x3F) << 6)                                \
1100        | ((p)[-1] & 0x3F)))                                     \
1101    : ((p) += 5,                                                 \
1102       ((((p)[-4] & 0x3F) << 18)                                 \
1103        | (((p)[-3] & 0x3F) << 12)                               \
1104        | (((p)[-2] & 0x3F) << 6)                                \
1105        | ((p)[-1] & 0x3F))))
1106
1107
1108 static void
1109 coding_set_source (coding)
1110      struct coding_system *coding;
1111 {
1112   if (BUFFERP (coding->src_object))
1113     {
1114       struct buffer *buf = XBUFFER (coding->src_object);
1115
1116       if (coding->src_pos < 0)
1117         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1118       else
1119         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1120     }
1121   else if (STRINGP (coding->src_object))
1122     {
1123       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1124     }
1125   else
1126     /* Otherwise, the source is C string and is never relocated
1127        automatically.  Thus we don't have to update anything.  */
1128     ;
1129 }
1130
1131 static void
1132 coding_set_destination (coding)
1133      struct coding_system *coding;
1134 {
1135   if (BUFFERP (coding->dst_object))
1136     {
1137       if (coding->src_pos < 0)
1138         {
1139           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1140           coding->dst_bytes = (GAP_END_ADDR
1141                                - (coding->src_bytes - coding->consumed)
1142                                - coding->destination);
1143         }
1144       else
1145         {
1146           /* We are sure that coding->dst_pos_byte is before the gap
1147              of the buffer. */
1148           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1149                                  + coding->dst_pos_byte - BEG_BYTE);
1150           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1151                                - coding->destination);
1152         }
1153     }
1154   else
1155     /* Otherwise, the destination is C string and is never relocated
1156        automatically.  Thus we don't have to update anything.  */
1157     ;
1158 }
1159
1160
1161 static void
1162 coding_alloc_by_realloc (coding, bytes)
1163      struct coding_system *coding;
1164      EMACS_INT bytes;
1165 {
1166   coding->destination = (unsigned char *) xrealloc (coding->destination,
1167                                                     coding->dst_bytes + bytes);
1168   coding->dst_bytes += bytes;
1169 }
1170
1171 static void
1172 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1173      struct coding_system *coding;
1174      EMACS_INT gap_head_used, bytes;
1175 {
1176   if (EQ (coding->src_object, coding->dst_object))
1177     {
1178       /* The gap may contain the produced data at the head and not-yet
1179          consumed data at the tail.  To preserve those data, we at
1180          first make the gap size to zero, then increase the gap
1181          size.  */
1182       EMACS_INT add = GAP_SIZE;
1183
1184       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1185       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1186       make_gap (bytes);
1187       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1188       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1189     }
1190   else
1191     {
1192       Lisp_Object this_buffer;
1193
1194       this_buffer = Fcurrent_buffer ();
1195       set_buffer_internal (XBUFFER (coding->dst_object));
1196       make_gap (bytes);
1197       set_buffer_internal (XBUFFER (this_buffer));
1198     }
1199 }
1200
1201
1202 static unsigned char *
1203 alloc_destination (coding, nbytes, dst)
1204      struct coding_system *coding;
1205      EMACS_INT nbytes;
1206      unsigned char *dst;
1207 {
1208   EMACS_INT offset = dst - coding->destination;
1209
1210   if (BUFFERP (coding->dst_object))
1211     {
1212       struct buffer *buf = XBUFFER (coding->dst_object);
1213
1214       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1215     }
1216   else
1217     coding_alloc_by_realloc (coding, nbytes);
1218   coding_set_destination (coding);
1219   dst = coding->destination + offset;
1220   return dst;
1221 }
1222
1223 /** Macros for annotations.  */
1224
1225 /* An annotation data is stored in the array coding->charbuf in this
1226    format:
1227      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1228    LENGTH is the number of elements in the annotation.
1229    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1230    NCHARS is the number of characters in the text annotated.
1231
1232    The format of the following elements depend on ANNOTATION_MASK.
1233
1234    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1235    follows:
1236      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1237
1238    NBYTES is the number of bytes specified in the header part of
1239    old-style emacs-mule encoding, or 0 for the other kind of
1240    composition.
1241
1242    METHOD is one of enum composition_method.
1243
1244    Optionnal COMPOSITION-COMPONENTS are characters and composition
1245    rules.
1246
1247    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1248    follows.
1249
1250    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1251    recover from an invalid annotation, and should be skipped by
1252    produce_annotation.  */
1253
1254 /* Maximum length of the header of annotation data.  */
1255 #define MAX_ANNOTATION_LENGTH 5
1256
1257 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1258   do {                                                  \
1259     *(buf)++ = -(len);                                  \
1260     *(buf)++ = (mask);                                  \
1261     *(buf)++ = (nchars);                                \
1262     coding->annotated = 1;                              \
1263   } while (0);
1264
1265 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1266   do {                                                                      \
1267     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1268     *buf++ = nbytes;                                                        \
1269     *buf++ = method;                                                        \
1270   } while (0)
1271
1272
1273 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1274   do {                                                                  \
1275     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1276     *buf++ = id;                                                        \
1277   } while (0)
1278
1279 \f
1280 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1281
1282
1283
1284 \f
1285 /*** 3. UTF-8 ***/
1286
1287 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1288    Check if a text is encoded in UTF-8.  If it is, return 1, else
1289    return 0.  */
1290
1291 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1292 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1293 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1294 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1295 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1296 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1297
1298 #define UTF_BOM 0xFEFF
1299 #define UTF_8_BOM_1 0xEF
1300 #define UTF_8_BOM_2 0xBB
1301 #define UTF_8_BOM_3 0xBF
1302
1303 static int
1304 detect_coding_utf_8 (coding, detect_info)
1305      struct coding_system *coding;
1306      struct coding_detection_info *detect_info;
1307 {
1308   const unsigned char *src = coding->source, *src_base;
1309   const unsigned char *src_end = coding->source + coding->src_bytes;
1310   int multibytep = coding->src_multibyte;
1311   int consumed_chars = 0;
1312   int bom_found = 0;
1313   int found = 0;
1314
1315   detect_info->checked |= CATEGORY_MASK_UTF_8;
1316   /* A coding system of this category is always ASCII compatible.  */
1317   src += coding->head_ascii;
1318
1319   while (1)
1320     {
1321       int c, c1, c2, c3, c4;
1322
1323       src_base = src;
1324       ONE_MORE_BYTE (c);
1325       if (c < 0 || UTF_8_1_OCTET_P (c))
1326         continue;
1327       ONE_MORE_BYTE (c1);
1328       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1329         break;
1330       if (UTF_8_2_OCTET_LEADING_P (c))
1331         {
1332           found = 1;
1333           continue;
1334         }
1335       ONE_MORE_BYTE (c2);
1336       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1337         break;
1338       if (UTF_8_3_OCTET_LEADING_P (c))
1339         {
1340           found = 1;
1341           if (src_base == coding->source
1342               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1343             bom_found = 1;
1344           continue;
1345         }
1346       ONE_MORE_BYTE (c3);
1347       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1348         break;
1349       if (UTF_8_4_OCTET_LEADING_P (c))
1350         {
1351           found = 1;
1352           continue;
1353         }
1354       ONE_MORE_BYTE (c4);
1355       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1356         break;
1357       if (UTF_8_5_OCTET_LEADING_P (c))
1358         {
1359           found = 1;
1360           continue;
1361         }
1362       break;
1363     }
1364   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1365   return 0;
1366
1367  no_more_source:
1368   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1369     {
1370       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1371       return 0;
1372     }
1373   if (bom_found)
1374     {
1375       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1376       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1377     }
1378   else
1379     {
1380       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1381       if (found)
1382         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1383     }
1384   return 1;
1385 }
1386
1387
1388 static void
1389 decode_coding_utf_8 (coding)
1390      struct coding_system *coding;
1391 {
1392   const unsigned char *src = coding->source + coding->consumed;
1393   const unsigned char *src_end = coding->source + coding->src_bytes;
1394   const unsigned char *src_base;
1395   int *charbuf = coding->charbuf + coding->charbuf_used;
1396   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1397   int consumed_chars = 0, consumed_chars_base = 0;
1398   int multibytep = coding->src_multibyte;
1399   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1400   Lisp_Object attr, charset_list;
1401   int eol_crlf =
1402     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1403   int byte_after_cr = -1;
1404
1405   CODING_GET_INFO (coding, attr, charset_list);
1406
1407   if (bom != utf_without_bom)
1408     {
1409       int c1, c2, c3;
1410
1411       src_base = src;
1412       ONE_MORE_BYTE (c1);
1413       if (! UTF_8_3_OCTET_LEADING_P (c1))
1414         src = src_base;
1415       else
1416         {
1417           ONE_MORE_BYTE (c2);
1418           if (! UTF_8_EXTRA_OCTET_P (c2))
1419             src = src_base;
1420           else
1421             {
1422               ONE_MORE_BYTE (c3);
1423               if (! UTF_8_EXTRA_OCTET_P (c3))
1424                 src = src_base;
1425               else
1426                 {
1427                   if ((c1 != UTF_8_BOM_1)
1428                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1429                     src = src_base;
1430                   else
1431                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1432                 }
1433             }
1434         }
1435     }
1436   CODING_UTF_8_BOM (coding) = utf_without_bom;
1437
1438
1439
1440   while (1)
1441     {
1442       int c, c1, c2, c3, c4, c5;
1443
1444       src_base = src;
1445       consumed_chars_base = consumed_chars;
1446
1447       if (charbuf >= charbuf_end)
1448         {
1449           if (byte_after_cr >= 0)
1450             src_base--;
1451           break;
1452         }
1453
1454       if (byte_after_cr >= 0)
1455         c1 = byte_after_cr, byte_after_cr = -1;
1456       else
1457         ONE_MORE_BYTE (c1);
1458       if (c1 < 0)
1459         {
1460           c = - c1;
1461         }
1462       else if (UTF_8_1_OCTET_P(c1))
1463         {
1464           if (eol_crlf && c1 == '\r')
1465             ONE_MORE_BYTE (byte_after_cr);
1466           c = c1;
1467         }
1468       else
1469         {
1470           ONE_MORE_BYTE (c2);
1471           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1472             goto invalid_code;
1473           if (UTF_8_2_OCTET_LEADING_P (c1))
1474             {
1475               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1476               /* Reject overlong sequences here and below.  Encoders
1477                  producing them are incorrect, they can be misleading,
1478                  and they mess up read/write invariance.  */
1479               if (c < 128)
1480                 goto invalid_code;
1481             }
1482           else
1483             {
1484               ONE_MORE_BYTE (c3);
1485               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1486                 goto invalid_code;
1487               if (UTF_8_3_OCTET_LEADING_P (c1))
1488                 {
1489                   c = (((c1 & 0xF) << 12)
1490                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1491                   if (c < 0x800
1492                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1493                     goto invalid_code;
1494                 }
1495               else
1496                 {
1497                   ONE_MORE_BYTE (c4);
1498                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1499                     goto invalid_code;
1500                   if (UTF_8_4_OCTET_LEADING_P (c1))
1501                     {
1502                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1503                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1504                     if (c < 0x10000)
1505                       goto invalid_code;
1506                     }
1507                   else
1508                     {
1509                       ONE_MORE_BYTE (c5);
1510                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1511                         goto invalid_code;
1512                       if (UTF_8_5_OCTET_LEADING_P (c1))
1513                         {
1514                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1515                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1516                                | (c5 & 0x3F));
1517                           if ((c > MAX_CHAR) || (c < 0x200000))
1518                             goto invalid_code;
1519                         }
1520                       else
1521                         goto invalid_code;
1522                     }
1523                 }
1524             }
1525         }
1526
1527       *charbuf++ = c;
1528       continue;
1529
1530     invalid_code:
1531       src = src_base;
1532       consumed_chars = consumed_chars_base;
1533       ONE_MORE_BYTE (c);
1534       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1535       coding->errors++;
1536     }
1537
1538  no_more_source:
1539   coding->consumed_char += consumed_chars_base;
1540   coding->consumed = src_base - coding->source;
1541   coding->charbuf_used = charbuf - coding->charbuf;
1542 }
1543
1544
1545 static int
1546 encode_coding_utf_8 (coding)
1547      struct coding_system *coding;
1548 {
1549   int multibytep = coding->dst_multibyte;
1550   int *charbuf = coding->charbuf;
1551   int *charbuf_end = charbuf + coding->charbuf_used;
1552   unsigned char *dst = coding->destination + coding->produced;
1553   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1554   int produced_chars = 0;
1555   int c;
1556
1557   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1558     {
1559       ASSURE_DESTINATION (3);
1560       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1561       CODING_UTF_8_BOM (coding) = utf_without_bom;
1562     }
1563
1564   if (multibytep)
1565     {
1566       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1567
1568       while (charbuf < charbuf_end)
1569         {
1570           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1571
1572           ASSURE_DESTINATION (safe_room);
1573           c = *charbuf++;
1574           if (CHAR_BYTE8_P (c))
1575             {
1576               c = CHAR_TO_BYTE8 (c);
1577               EMIT_ONE_BYTE (c);
1578             }
1579           else
1580             {
1581               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1582               for (p = str; p < pend; p++)
1583                 EMIT_ONE_BYTE (*p);
1584             }
1585         }
1586     }
1587   else
1588     {
1589       int safe_room = MAX_MULTIBYTE_LENGTH;
1590
1591       while (charbuf < charbuf_end)
1592         {
1593           ASSURE_DESTINATION (safe_room);
1594           c = *charbuf++;
1595           if (CHAR_BYTE8_P (c))
1596             *dst++ = CHAR_TO_BYTE8 (c);
1597           else
1598             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1599           produced_chars++;
1600         }
1601     }
1602   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1603   coding->produced_char += produced_chars;
1604   coding->produced = dst - coding->destination;
1605   return 0;
1606 }
1607
1608
1609 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1610    Check if a text is encoded in one of UTF-16 based coding systems.
1611    If it is, return 1, else return 0.  */
1612
1613 #define UTF_16_HIGH_SURROGATE_P(val) \
1614   (((val) & 0xFC00) == 0xD800)
1615
1616 #define UTF_16_LOW_SURROGATE_P(val) \
1617   (((val) & 0xFC00) == 0xDC00)
1618
1619 #define UTF_16_INVALID_P(val)   \
1620   (((val) == 0xFFFE)            \
1621    || ((val) == 0xFFFF)         \
1622    || UTF_16_LOW_SURROGATE_P (val))
1623
1624
1625 static int
1626 detect_coding_utf_16 (coding, detect_info)
1627      struct coding_system *coding;
1628      struct coding_detection_info *detect_info;
1629 {
1630   const unsigned char *src = coding->source, *src_base = src;
1631   const unsigned char *src_end = coding->source + coding->src_bytes;
1632   int multibytep = coding->src_multibyte;
1633   int consumed_chars = 0;
1634   int c1, c2;
1635
1636   detect_info->checked |= CATEGORY_MASK_UTF_16;
1637   if (coding->mode & CODING_MODE_LAST_BLOCK
1638       && (coding->src_chars & 1))
1639     {
1640       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1641       return 0;
1642     }
1643
1644   TWO_MORE_BYTES (c1, c2);
1645   if ((c1 == 0xFF) && (c2 == 0xFE))
1646     {
1647       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1648                              | CATEGORY_MASK_UTF_16_AUTO);
1649       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1650                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1651                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1652     }
1653   else if ((c1 == 0xFE) && (c2 == 0xFF))
1654     {
1655       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1656                              | CATEGORY_MASK_UTF_16_AUTO);
1657       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1658                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1659                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1660     }
1661   else if (c2 < 0)
1662     {
1663       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1664       return 0;
1665     }
1666   else
1667     {
1668       /* We check the dispersion of Eth and Oth bytes where E is even and
1669          O is odd.  If both are high, we assume binary data.*/
1670       unsigned char e[256], o[256];
1671       unsigned e_num = 1, o_num = 1;
1672
1673       memset (e, 0, 256);
1674       memset (o, 0, 256);
1675       e[c1] = 1;
1676       o[c2] = 1;
1677
1678       detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1679                                 |CATEGORY_MASK_UTF_16_BE
1680                                 | CATEGORY_MASK_UTF_16_LE);
1681
1682       while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1683              != CATEGORY_MASK_UTF_16)
1684         {
1685           TWO_MORE_BYTES (c1, c2);
1686           if (c2 < 0)
1687             break;
1688           if (! e[c1])
1689             {
1690               e[c1] = 1;
1691               e_num++;
1692               if (e_num >= 128)
1693                 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1694             }
1695           if (! o[c2])
1696             {
1697               o[c2] = 1;
1698               o_num++;
1699               if (o_num >= 128)
1700                 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1701             }
1702         }
1703       return 0;
1704     }
1705
1706  no_more_source:
1707   return 1;
1708 }
1709
1710 static void
1711 decode_coding_utf_16 (coding)
1712      struct coding_system *coding;
1713 {
1714   const unsigned char *src = coding->source + coding->consumed;
1715   const unsigned char *src_end = coding->source + coding->src_bytes;
1716   const unsigned char *src_base;
1717   int *charbuf = coding->charbuf + coding->charbuf_used;
1718   /* We may produces at most 3 chars in one loop.  */
1719   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1720   int consumed_chars = 0, consumed_chars_base = 0;
1721   int multibytep = coding->src_multibyte;
1722   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1723   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1724   int surrogate = CODING_UTF_16_SURROGATE (coding);
1725   Lisp_Object attr, charset_list;
1726   int eol_crlf =
1727     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1728   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1729
1730   CODING_GET_INFO (coding, attr, charset_list);
1731
1732   if (bom == utf_with_bom)
1733     {
1734       int c, c1, c2;
1735
1736       src_base = src;
1737       ONE_MORE_BYTE (c1);
1738       ONE_MORE_BYTE (c2);
1739       c = (c1 << 8) | c2;
1740
1741       if (endian == utf_16_big_endian
1742           ? c != 0xFEFF : c != 0xFFFE)
1743         {
1744           /* The first two bytes are not BOM.  Treat them as bytes
1745              for a normal character.  */
1746           src = src_base;
1747           coding->errors++;
1748         }
1749       CODING_UTF_16_BOM (coding) = utf_without_bom;
1750     }
1751   else if (bom == utf_detect_bom)
1752     {
1753       /* We have already tried to detect BOM and failed in
1754          detect_coding.  */
1755       CODING_UTF_16_BOM (coding) = utf_without_bom;
1756     }
1757
1758   while (1)
1759     {
1760       int c, c1, c2;
1761
1762       src_base = src;
1763       consumed_chars_base = consumed_chars;
1764
1765       if (charbuf >= charbuf_end)
1766         {
1767           if (byte_after_cr1 >= 0)
1768             src_base -= 2;
1769           break;
1770         }
1771
1772       if (byte_after_cr1 >= 0)
1773         c1 = byte_after_cr1, byte_after_cr1 = -1;
1774       else
1775         ONE_MORE_BYTE (c1);
1776       if (c1 < 0)
1777         {
1778           *charbuf++ = -c1;
1779           continue;
1780         }
1781       if (byte_after_cr2 >= 0)
1782         c2 = byte_after_cr2, byte_after_cr2 = -1;
1783       else
1784         ONE_MORE_BYTE (c2);
1785       if (c2 < 0)
1786         {
1787           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1788           *charbuf++ = -c2;
1789           continue;
1790         }
1791       c = (endian == utf_16_big_endian
1792            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1793
1794       if (surrogate)
1795         {
1796           if (! UTF_16_LOW_SURROGATE_P (c))
1797             {
1798               if (endian == utf_16_big_endian)
1799                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1800               else
1801                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1802               *charbuf++ = c1;
1803               *charbuf++ = c2;
1804               coding->errors++;
1805               if (UTF_16_HIGH_SURROGATE_P (c))
1806                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1807               else
1808                 *charbuf++ = c;
1809             }
1810           else
1811             {
1812               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1813               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1814               *charbuf++ = 0x10000 + c;
1815             }
1816         }
1817       else
1818         {
1819           if (UTF_16_HIGH_SURROGATE_P (c))
1820             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1821           else
1822             {
1823               if (eol_crlf && c == '\r')
1824                 {
1825                   ONE_MORE_BYTE (byte_after_cr1);
1826                   ONE_MORE_BYTE (byte_after_cr2);
1827                 }
1828               *charbuf++ = c;
1829             }
1830         }
1831     }
1832
1833  no_more_source:
1834   coding->consumed_char += consumed_chars_base;
1835   coding->consumed = src_base - coding->source;
1836   coding->charbuf_used = charbuf - coding->charbuf;
1837 }
1838
1839 static int
1840 encode_coding_utf_16 (coding)
1841      struct coding_system *coding;
1842 {
1843   int multibytep = coding->dst_multibyte;
1844   int *charbuf = coding->charbuf;
1845   int *charbuf_end = charbuf + coding->charbuf_used;
1846   unsigned char *dst = coding->destination + coding->produced;
1847   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1848   int safe_room = 8;
1849   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1850   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1851   int produced_chars = 0;
1852   Lisp_Object attrs, charset_list;
1853   int c;
1854
1855   CODING_GET_INFO (coding, attrs, charset_list);
1856
1857   if (bom != utf_without_bom)
1858     {
1859       ASSURE_DESTINATION (safe_room);
1860       if (big_endian)
1861         EMIT_TWO_BYTES (0xFE, 0xFF);
1862       else
1863         EMIT_TWO_BYTES (0xFF, 0xFE);
1864       CODING_UTF_16_BOM (coding) = utf_without_bom;
1865     }
1866
1867   while (charbuf < charbuf_end)
1868     {
1869       ASSURE_DESTINATION (safe_room);
1870       c = *charbuf++;
1871       if (c > MAX_UNICODE_CHAR)
1872         c = coding->default_char;
1873
1874       if (c < 0x10000)
1875         {
1876           if (big_endian)
1877             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1878           else
1879             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1880         }
1881       else
1882         {
1883           int c1, c2;
1884
1885           c -= 0x10000;
1886           c1 = (c >> 10) + 0xD800;
1887           c2 = (c & 0x3FF) + 0xDC00;
1888           if (big_endian)
1889             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1890           else
1891             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1892         }
1893     }
1894   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1895   coding->produced = dst - coding->destination;
1896   coding->produced_char += produced_chars;
1897   return 0;
1898 }
1899
1900 \f
1901 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1902
1903 /* Emacs' internal format for representation of multiple character
1904    sets is a kind of multi-byte encoding, i.e. characters are
1905    represented by variable-length sequences of one-byte codes.
1906
1907    ASCII characters and control characters (e.g. `tab', `newline') are
1908    represented by one-byte sequences which are their ASCII codes, in
1909    the range 0x00 through 0x7F.
1910
1911    8-bit characters of the range 0x80..0x9F are represented by
1912    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1913    code + 0x20).
1914
1915    8-bit characters of the range 0xA0..0xFF are represented by
1916    one-byte sequences which are their 8-bit code.
1917
1918    The other characters are represented by a sequence of `base
1919    leading-code', optional `extended leading-code', and one or two
1920    `position-code's.  The length of the sequence is determined by the
1921    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1922    whereas extended leading-code and position-code take the range 0xA0
1923    through 0xFF.  See `charset.h' for more details about leading-code
1924    and position-code.
1925
1926    --- CODE RANGE of Emacs' internal format ---
1927    character set        range
1928    -------------        -----
1929    ascii                0x00..0x7F
1930    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1931    eight-bit-graphic    0xA0..0xBF
1932    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1933    ---------------------------------------------
1934
1935    As this is the internal character representation, the format is
1936    usually not used externally (i.e. in a file or in a data sent to a
1937    process).  But, it is possible to have a text externally in this
1938    format (i.e. by encoding by the coding system `emacs-mule').
1939
1940    In that case, a sequence of one-byte codes has a slightly different
1941    form.
1942
1943    At first, all characters in eight-bit-control are represented by
1944    one-byte sequences which are their 8-bit code.
1945
1946    Next, character composition data are represented by the byte
1947    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1948    where,
1949         METHOD is 0xF2 plus one of composition method (enum
1950         composition_method),
1951
1952         BYTES is 0xA0 plus a byte length of this composition data,
1953
1954         CHARS is 0xA0 plus a number of characters composed by this
1955         data,
1956
1957         COMPONENTs are characters of multibye form or composition
1958         rules encoded by two-byte of ASCII codes.
1959
1960    In addition, for backward compatibility, the following formats are
1961    also recognized as composition data on decoding.
1962
1963    0x80 MSEQ ...
1964    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1965
1966    Here,
1967         MSEQ is a multibyte form but in these special format:
1968           ASCII: 0xA0 ASCII_CODE+0x80,
1969           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1970         RULE is a one byte code of the range 0xA0..0xF0 that
1971         represents a composition rule.
1972   */
1973
1974 char emacs_mule_bytes[256];
1975
1976
1977 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1978    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1979    else return 0.  */
1980
1981 static int
1982 detect_coding_emacs_mule (coding, detect_info)
1983      struct coding_system *coding;
1984      struct coding_detection_info *detect_info;
1985 {
1986   const unsigned char *src = coding->source, *src_base;
1987   const unsigned char *src_end = coding->source + coding->src_bytes;
1988   int multibytep = coding->src_multibyte;
1989   int consumed_chars = 0;
1990   int c;
1991   int found = 0;
1992
1993   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1994   /* A coding system of this category is always ASCII compatible.  */
1995   src += coding->head_ascii;
1996
1997   while (1)
1998     {
1999       src_base = src;
2000       ONE_MORE_BYTE (c);
2001       if (c < 0)
2002         continue;
2003       if (c == 0x80)
2004         {
2005           /* Perhaps the start of composite character.  We simply skip
2006              it because analyzing it is too heavy for detecting.  But,
2007              at least, we check that the composite character
2008              constitutes of more than 4 bytes.  */
2009           const unsigned char *src_base;
2010
2011         repeat:
2012           src_base = src;
2013           do
2014             {
2015               ONE_MORE_BYTE (c);
2016             }
2017           while (c >= 0xA0);
2018
2019           if (src - src_base <= 4)
2020             break;
2021           found = CATEGORY_MASK_EMACS_MULE;
2022           if (c == 0x80)
2023             goto repeat;
2024         }
2025
2026       if (c < 0x80)
2027         {
2028           if (c < 0x20
2029               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2030             break;
2031         }
2032       else
2033         {
2034           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2035
2036           while (more_bytes > 0)
2037             {
2038               ONE_MORE_BYTE (c);
2039               if (c < 0xA0)
2040                 {
2041                   src--;        /* Unread the last byte.  */
2042                   break;
2043                 }
2044               more_bytes--;
2045             }
2046           if (more_bytes != 0)
2047             break;
2048           found = CATEGORY_MASK_EMACS_MULE;
2049         }
2050     }
2051   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2052   return 0;
2053
2054  no_more_source:
2055   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2056     {
2057       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2058       return 0;
2059     }
2060   detect_info->found |= found;
2061   return 1;
2062 }
2063
2064
2065 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2066    character.  If CMP_STATUS indicates that we must expect MSEQ or
2067    RULE described above, decode it and return the negative value of
2068    the decoded character or rule.  If an invalid byte is found, return
2069    -1.  If SRC is too short, return -2.  */
2070
2071 int
2072 emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
2073      struct coding_system *coding;
2074      const unsigned char *src;
2075      int *nbytes, *nchars, *id;
2076      struct composition_status *cmp_status;
2077 {
2078   const unsigned char *src_end = coding->source + coding->src_bytes;
2079   const unsigned char *src_base = src;
2080   int multibytep = coding->src_multibyte;
2081   struct charset *charset;
2082   unsigned code;
2083   int c;
2084   int consumed_chars = 0;
2085   int mseq_found = 0;
2086
2087   ONE_MORE_BYTE (c);
2088   if (c < 0)
2089     {
2090       c = -c;
2091       charset = emacs_mule_charset[0];
2092     }
2093   else
2094     {
2095       if (c >= 0xA0)
2096         {
2097           if (cmp_status->state != COMPOSING_NO
2098               && cmp_status->old_form)
2099             {
2100               if (cmp_status->state == COMPOSING_CHAR)
2101                 {
2102                   if (c == 0xA0)
2103                     {
2104                       ONE_MORE_BYTE (c);
2105                       c -= 0x80;
2106                       if (c < 0)
2107                         goto invalid_code;
2108                     }
2109                   else
2110                     c -= 0x20;
2111                   mseq_found = 1;
2112                 }
2113               else
2114                 {
2115                   *nbytes = src - src_base;
2116                   *nchars = consumed_chars;
2117                   return -c;
2118                 }
2119             }
2120           else
2121             goto invalid_code;
2122         }
2123
2124       switch (emacs_mule_bytes[c])
2125         {
2126         case 2:
2127           if (! (charset = emacs_mule_charset[c]))
2128             goto invalid_code;
2129           ONE_MORE_BYTE (c);
2130           if (c < 0xA0)
2131             goto invalid_code;
2132           code = c & 0x7F;
2133           break;
2134
2135         case 3:
2136           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2137               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2138             {
2139               ONE_MORE_BYTE (c);
2140               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
2141                 goto invalid_code;
2142               ONE_MORE_BYTE (c);
2143               if (c < 0xA0)
2144                 goto invalid_code;
2145               code = c & 0x7F;
2146             }
2147           else
2148             {
2149               if (! (charset = emacs_mule_charset[c]))
2150                 goto invalid_code;
2151               ONE_MORE_BYTE (c);
2152               if (c < 0xA0)
2153                 goto invalid_code;
2154               code = (c & 0x7F) << 8;
2155               ONE_MORE_BYTE (c);
2156               if (c < 0xA0)
2157                 goto invalid_code;
2158               code |= c & 0x7F;
2159             }
2160           break;
2161
2162         case 4:
2163           ONE_MORE_BYTE (c);
2164           if (c < 0 || ! (charset = emacs_mule_charset[c]))
2165             goto invalid_code;
2166           ONE_MORE_BYTE (c);
2167           if (c < 0xA0)
2168             goto invalid_code;
2169           code = (c & 0x7F) << 8;
2170           ONE_MORE_BYTE (c);
2171           if (c < 0xA0)
2172             goto invalid_code;
2173           code |= c & 0x7F;
2174           break;
2175
2176         case 1:
2177           code = c;
2178           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2179                                      ? charset_ascii : charset_eight_bit);
2180           break;
2181
2182         default:
2183           abort ();
2184         }
2185       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, code, c);
2186       if (c < 0)
2187         goto invalid_code;
2188     }
2189   *nbytes = src - src_base;
2190   *nchars = consumed_chars;
2191   if (id)
2192     *id = charset->id;
2193   return (mseq_found ? -c : c);
2194
2195  no_more_source:
2196   return -2;
2197
2198  invalid_code:
2199   return -1;
2200 }
2201
2202
2203 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2204
2205 /* Handle these composition sequence ('|': the end of header elements,
2206    BYTES and CHARS >= 0xA0):
2207
2208    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2209    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2210    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2211
2212    and these old form:
2213
2214    (4) relative composition: 0x80 | MSEQ ... MSEQ
2215    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2216
2217    When the starter 0x80 and the following header elements are found,
2218    this annotation header is produced.
2219
2220         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2221
2222    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2223    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2224
2225    Then, upon reading the following elements, these codes are produced
2226    until the composition end is found:
2227
2228    (1) CHAR ... CHAR
2229    (2) ALT ... ALT CHAR ... CHAR
2230    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2231    (4) CHAR ... CHAR
2232    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2233
2234    When the composition end is found, LENGTH and NCHARS in the
2235    annotation header is updated as below:
2236
2237    (1) LENGTH: unchanged, NCHARS: unchanged
2238    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2239    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2240    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2241    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2242
2243    If an error is found while composing, the annotation header is
2244    changed to the original composition header (plus filler -1s) as
2245    below:
2246
2247    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2248    (5)          [ 0x80 0xFF -1 -1- -1 ]
2249
2250    and the sequence [ -2 DECODED-RULE ] is changed to the original
2251    byte sequence as below:
2252         o the original byte sequence is B: [ B -1 ]
2253         o the original byte sequence is B1 B2: [ B1 B2 ]
2254
2255    Most of the routines are implemented by macros because many
2256    variables and labels in the caller decode_coding_emacs_mule must be
2257    accessible, and they are usually called just once (thus doesn't
2258    increase the size of compiled object).  */
2259
2260 /* Decode a composition rule represented by C as a component of
2261    composition sequence of Emacs 20 style.  Set RULE to the decoded
2262    rule. */
2263
2264 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2265   do {                                                  \
2266     int gref, nref;                                     \
2267                                                         \
2268     c -= 0xA0;                                          \
2269     if (c < 0 || c >= 81)                               \
2270       goto invalid_code;                                \
2271     gref = c / 9, nref = c % 9;                         \
2272     if (gref == 4) gref = 10;                           \
2273     if (nref == 4) nref = 10;                           \
2274     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2275   } while (0)
2276
2277
2278 /* Decode a composition rule represented by C and the following byte
2279    at SRC as a component of composition sequence of Emacs 21 style.
2280    Set RULE to the decoded rule.  */
2281
2282 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2283   do {                                                  \
2284     int gref, nref;                                     \
2285                                                         \
2286     gref = c - 0x20;                                    \
2287     if (gref < 0 || gref >= 81)                         \
2288       goto invalid_code;                                \
2289     ONE_MORE_BYTE (c);                                  \
2290     nref = c - 0x20;                                    \
2291     if (nref < 0 || nref >= 81)                         \
2292       goto invalid_code;                                \
2293     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2294   } while (0)
2295
2296
2297 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2298    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2299    byte length of this composition information, CHARS is the number of
2300    characters composed by this composition.  */
2301
2302 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2303   do {                                                                  \
2304     enum composition_method method = c - 0xF2;                          \
2305     int *charbuf_base = charbuf;                                        \
2306     int nbytes, nchars;                                                 \
2307                                                                         \
2308     ONE_MORE_BYTE (c);                                                  \
2309     if (c < 0)                                                          \
2310       goto invalid_code;                                                \
2311     nbytes = c - 0xA0;                                                  \
2312     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2313       goto invalid_code;                                                \
2314     ONE_MORE_BYTE (c);                                                  \
2315     nchars = c - 0xA0;                                                  \
2316     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2317       goto invalid_code;                                                \
2318     cmp_status->old_form = 0;                                           \
2319     cmp_status->method = method;                                        \
2320     if (method == COMPOSITION_RELATIVE)                                 \
2321       cmp_status->state = COMPOSING_CHAR;                               \
2322     else                                                                \
2323       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2324     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2325     cmp_status->nchars = nchars;                                        \
2326     cmp_status->ncomps = nbytes - 4;                                    \
2327     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2328   } while (0)
2329
2330
2331 /* Start of Emacs 20 style format for relative composition.  */
2332
2333 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2334   do {                                                          \
2335     cmp_status->old_form = 1;                                   \
2336     cmp_status->method = COMPOSITION_RELATIVE;                  \
2337     cmp_status->state = COMPOSING_CHAR;                         \
2338     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2339     cmp_status->nchars = cmp_status->ncomps = 0;                \
2340     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2341   } while (0)
2342
2343
2344 /* Start of Emacs 20 style format for rule-base composition.  */
2345
2346 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2347   do {                                                          \
2348     cmp_status->old_form = 1;                                   \
2349     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2350     cmp_status->state = COMPOSING_CHAR;                         \
2351     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2352     cmp_status->nchars = cmp_status->ncomps = 0;                \
2353     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2354   } while (0)
2355
2356
2357 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2358   do {                                                  \
2359     const unsigned char *current_src = src;             \
2360                                                         \
2361     ONE_MORE_BYTE (c);                                  \
2362     if (c < 0)                                          \
2363       goto invalid_code;                                \
2364     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2365         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2366       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2367     else if (c < 0xA0)                                  \
2368       goto invalid_code;                                \
2369     else if (c < 0xC0)                                  \
2370       {                                                 \
2371         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2372         /* Re-read C as a composition component.  */    \
2373         src = current_src;                              \
2374       }                                                 \
2375     else if (c == 0xFF)                                 \
2376       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2377     else                                                \
2378       goto invalid_code;                                \
2379   } while (0)
2380
2381 #define EMACS_MULE_COMPOSITION_END()                            \
2382   do {                                                          \
2383     int idx = - cmp_status->length;                             \
2384                                                                 \
2385     if (cmp_status->old_form)                                   \
2386       charbuf[idx + 2] = cmp_status->nchars;                    \
2387     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2388       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2389     cmp_status->state = COMPOSING_NO;                           \
2390   } while (0)
2391
2392
2393 static int
2394 emacs_mule_finish_composition (charbuf, cmp_status)
2395      int *charbuf;
2396      struct composition_status *cmp_status;
2397 {
2398   int idx = - cmp_status->length;
2399   int new_chars;
2400
2401   if (cmp_status->old_form && cmp_status->nchars > 0)
2402     {
2403       charbuf[idx + 2] = cmp_status->nchars;
2404       new_chars = 0;
2405       if (cmp_status->method == COMPOSITION_WITH_RULE
2406           && cmp_status->state == COMPOSING_CHAR)
2407         {
2408           /* The last rule was invalid.  */
2409           int rule = charbuf[-1] + 0xA0;
2410
2411           charbuf[-2] = BYTE8_TO_CHAR (rule);
2412           charbuf[-1] = -1;
2413           new_chars = 1;
2414         }
2415     }
2416   else
2417     {
2418       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2419
2420       if (cmp_status->method == COMPOSITION_WITH_RULE)
2421         {
2422           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2423           charbuf[idx++] = -3;
2424           charbuf[idx++] = 0;
2425           new_chars = 1;
2426         }
2427       else
2428         {
2429           int nchars = charbuf[idx + 1] + 0xA0;
2430           int nbytes = charbuf[idx + 2] + 0xA0;
2431
2432           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2433           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2434           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2435           charbuf[idx++] = -1;
2436           new_chars = 4;
2437         }
2438     }
2439   cmp_status->state = COMPOSING_NO;
2440   return new_chars;
2441 }
2442
2443 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2444   do {                                                                    \
2445     if (cmp_status->state != COMPOSING_NO)                                \
2446       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2447   } while (0)
2448
2449
2450 static void
2451 decode_coding_emacs_mule (coding)
2452      struct coding_system *coding;
2453 {
2454   const unsigned char *src = coding->source + coding->consumed;
2455   const unsigned char *src_end = coding->source + coding->src_bytes;
2456   const unsigned char *src_base;
2457   int *charbuf = coding->charbuf + coding->charbuf_used;
2458   /* We may produce two annocations (charset and composition) in one
2459      loop and one more charset annocation at the end.  */
2460   int *charbuf_end
2461     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2462   int consumed_chars = 0, consumed_chars_base;
2463   int multibytep = coding->src_multibyte;
2464   Lisp_Object attrs, charset_list;
2465   int char_offset = coding->produced_char;
2466   int last_offset = char_offset;
2467   int last_id = charset_ascii;
2468   int eol_crlf =
2469     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2470   int byte_after_cr = -1;
2471   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2472
2473   CODING_GET_INFO (coding, attrs, charset_list);
2474
2475   if (cmp_status->state != COMPOSING_NO)
2476     {
2477       int i;
2478
2479       for (i = 0; i < cmp_status->length; i++)
2480         *charbuf++ = cmp_status->carryover[i];
2481       coding->annotated = 1;
2482     }
2483
2484   while (1)
2485     {
2486       int c, id;
2487
2488       src_base = src;
2489       consumed_chars_base = consumed_chars;
2490
2491       if (charbuf >= charbuf_end)
2492         {
2493           if (byte_after_cr >= 0)
2494             src_base--;
2495           break;
2496         }
2497
2498       if (byte_after_cr >= 0)
2499         c = byte_after_cr, byte_after_cr = -1;
2500       else
2501         ONE_MORE_BYTE (c);
2502
2503       if (c < 0 || c == 0x80)
2504         {
2505           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2506           if (c < 0)
2507             {
2508               *charbuf++ = -c;
2509               char_offset++;
2510             }
2511           else
2512             DECODE_EMACS_MULE_COMPOSITION_START ();
2513           continue;
2514         }
2515
2516       if (c < 0x80)
2517         {
2518           if (eol_crlf && c == '\r')
2519             ONE_MORE_BYTE (byte_after_cr);
2520           id = charset_ascii;
2521           if (cmp_status->state != COMPOSING_NO)
2522             {
2523               if (cmp_status->old_form)
2524                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2525               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2526                 cmp_status->ncomps--;
2527             }
2528         }
2529       else
2530         {
2531           int nchars, nbytes;
2532           /* emacs_mule_char can load a charset map from a file, which
2533              allocates a large structure and might cause buffer text
2534              to be relocated as result.  Thus, we need to remember the
2535              original pointer to buffer text, and fixup all related
2536              pointers after the call.  */
2537           const unsigned char *orig = coding->source;
2538           EMACS_INT offset;
2539
2540           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2541                                cmp_status);
2542           offset = coding->source - orig;
2543           if (offset)
2544             {
2545               src += offset;
2546               src_base += offset;
2547               src_end += offset;
2548             }
2549           if (c < 0)
2550             {
2551               if (c == -1)
2552                 goto invalid_code;
2553               if (c == -2)
2554                 break;
2555             }
2556           src = src_base + nbytes;
2557           consumed_chars = consumed_chars_base + nchars;
2558           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2559             cmp_status->ncomps -= nchars;
2560         }
2561
2562       /* Now if C >= 0, we found a normally encoded characer, if C <
2563          0, we found an old-style composition component character or
2564          rule.  */
2565
2566       if (cmp_status->state == COMPOSING_NO)
2567         {
2568           if (last_id != id)
2569             {
2570               if (last_id != charset_ascii)
2571                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2572                                   last_id);
2573               last_id = id;
2574               last_offset = char_offset;
2575             }
2576           *charbuf++ = c;
2577           char_offset++;
2578         }
2579       else if (cmp_status->state == COMPOSING_CHAR)
2580         {
2581           if (cmp_status->old_form)
2582             {
2583               if (c >= 0)
2584                 {
2585                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2586                   *charbuf++ = c;
2587                   char_offset++;
2588                 }
2589               else
2590                 {
2591                   *charbuf++ = -c;
2592                   cmp_status->nchars++;
2593                   cmp_status->length++;
2594                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2595                     EMACS_MULE_COMPOSITION_END ();
2596                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2597                     cmp_status->state = COMPOSING_RULE;
2598                 }
2599             }
2600           else
2601             {
2602               *charbuf++ = c;
2603               cmp_status->length++;
2604               cmp_status->nchars--;
2605               if (cmp_status->nchars == 0)
2606                 EMACS_MULE_COMPOSITION_END ();
2607             }
2608         }
2609       else if (cmp_status->state == COMPOSING_RULE)
2610         {
2611           int rule;
2612
2613           if (c >= 0)
2614             {
2615               EMACS_MULE_COMPOSITION_END ();
2616               *charbuf++ = c;
2617               char_offset++;
2618             }
2619           else
2620             {
2621               c = -c;
2622               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2623               if (rule < 0)
2624                 goto invalid_code;
2625               *charbuf++ = -2;
2626               *charbuf++ = rule;
2627               cmp_status->length += 2;
2628               cmp_status->state = COMPOSING_CHAR;
2629             }
2630         }
2631       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2632         {
2633           *charbuf++ = c;
2634           cmp_status->length++;
2635           if (cmp_status->ncomps == 0)
2636             cmp_status->state = COMPOSING_CHAR;
2637           else if (cmp_status->ncomps > 0)
2638             {
2639               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2640                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2641             }
2642           else
2643             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2644         }
2645       else                      /* COMPOSING_COMPONENT_RULE */
2646         {
2647           int rule;
2648
2649           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2650           if (rule < 0)
2651             goto invalid_code;
2652           *charbuf++ = -2;
2653           *charbuf++ = rule;
2654           cmp_status->length += 2;
2655           cmp_status->ncomps--;
2656           if (cmp_status->ncomps > 0)
2657             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2658           else
2659             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2660         }
2661       continue;
2662
2663     retry:
2664       src = src_base;
2665       consumed_chars = consumed_chars_base;
2666       continue;
2667
2668     invalid_code:
2669       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2670       src = src_base;
2671       consumed_chars = consumed_chars_base;
2672       ONE_MORE_BYTE (c);
2673       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2674       char_offset++;
2675       coding->errors++;
2676     }
2677
2678  no_more_source:
2679   if (cmp_status->state != COMPOSING_NO)
2680     {
2681       if (coding->mode & CODING_MODE_LAST_BLOCK)
2682         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2683       else
2684         {
2685           int i;
2686
2687           charbuf -= cmp_status->length;
2688           for (i = 0; i < cmp_status->length; i++)
2689             cmp_status->carryover[i] = charbuf[i];
2690         }
2691     }
2692   if (last_id != charset_ascii)
2693     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2694   coding->consumed_char += consumed_chars_base;
2695   coding->consumed = src_base - coding->source;
2696   coding->charbuf_used = charbuf - coding->charbuf;
2697 }
2698
2699
2700 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2701   do {                                          \
2702     if (id < 0xA0)                              \
2703       codes[0] = id, codes[1] = 0;              \
2704     else if (id < 0xE0)                         \
2705       codes[0] = 0x9A, codes[1] = id;           \
2706     else if (id < 0xF0)                         \
2707       codes[0] = 0x9B, codes[1] = id;           \
2708     else if (id < 0xF5)                         \
2709       codes[0] = 0x9C, codes[1] = id;           \
2710     else                                        \
2711       codes[0] = 0x9D, codes[1] = id;           \
2712   } while (0);
2713
2714
2715 static int
2716 encode_coding_emacs_mule (coding)
2717      struct coding_system *coding;
2718 {
2719   int multibytep = coding->dst_multibyte;
2720   int *charbuf = coding->charbuf;
2721   int *charbuf_end = charbuf + coding->charbuf_used;
2722   unsigned char *dst = coding->destination + coding->produced;
2723   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2724   int safe_room = 8;
2725   int produced_chars = 0;
2726   Lisp_Object attrs, charset_list;
2727   int c;
2728   int preferred_charset_id = -1;
2729
2730   CODING_GET_INFO (coding, attrs, charset_list);
2731   if (! EQ (charset_list, Vemacs_mule_charset_list))
2732     {
2733       CODING_ATTR_CHARSET_LIST (attrs)
2734         = charset_list = Vemacs_mule_charset_list;
2735     }
2736
2737   while (charbuf < charbuf_end)
2738     {
2739       ASSURE_DESTINATION (safe_room);
2740       c = *charbuf++;
2741
2742       if (c < 0)
2743         {
2744           /* Handle an annotation.  */
2745           switch (*charbuf)
2746             {
2747             case CODING_ANNOTATE_COMPOSITION_MASK:
2748               /* Not yet implemented.  */
2749               break;
2750             case CODING_ANNOTATE_CHARSET_MASK:
2751               preferred_charset_id = charbuf[3];
2752               if (preferred_charset_id >= 0
2753                   && NILP (Fmemq (make_number (preferred_charset_id),
2754                                   charset_list)))
2755                 preferred_charset_id = -1;
2756               break;
2757             default:
2758               abort ();
2759             }
2760           charbuf += -c - 1;
2761           continue;
2762         }
2763
2764       if (ASCII_CHAR_P (c))
2765         EMIT_ONE_ASCII_BYTE (c);
2766       else if (CHAR_BYTE8_P (c))
2767         {
2768           c = CHAR_TO_BYTE8 (c);
2769           EMIT_ONE_BYTE (c);
2770         }
2771       else
2772         {
2773           struct charset *charset;
2774           unsigned code;
2775           int dimension;
2776           int emacs_mule_id;
2777           unsigned char leading_codes[2];
2778
2779           if (preferred_charset_id >= 0)
2780             {
2781               charset = CHARSET_FROM_ID (preferred_charset_id);
2782               if (CHAR_CHARSET_P (c, charset))
2783                 code = ENCODE_CHAR (charset, c);
2784               else
2785                 charset = char_charset (c, charset_list, &code);
2786             }
2787           else
2788             charset = char_charset (c, charset_list, &code);
2789           if (! charset)
2790             {
2791               c = coding->default_char;
2792               if (ASCII_CHAR_P (c))
2793                 {
2794                   EMIT_ONE_ASCII_BYTE (c);
2795                   continue;
2796                 }
2797               charset = char_charset (c, charset_list, &code);
2798             }
2799           dimension = CHARSET_DIMENSION (charset);
2800           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2801           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2802           EMIT_ONE_BYTE (leading_codes[0]);
2803           if (leading_codes[1])
2804             EMIT_ONE_BYTE (leading_codes[1]);
2805           if (dimension == 1)
2806             EMIT_ONE_BYTE (code | 0x80);
2807           else
2808             {
2809               code |= 0x8080;
2810               EMIT_ONE_BYTE (code >> 8);
2811               EMIT_ONE_BYTE (code & 0xFF);
2812             }
2813         }
2814     }
2815   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2816   coding->produced_char += produced_chars;
2817   coding->produced = dst - coding->destination;
2818   return 0;
2819 }
2820
2821 \f
2822 /*** 7. ISO2022 handlers ***/
2823
2824 /* The following note describes the coding system ISO2022 briefly.
2825    Since the intention of this note is to help understand the
2826    functions in this file, some parts are NOT ACCURATE or are OVERLY
2827    SIMPLIFIED.  For thorough understanding, please refer to the
2828    original document of ISO2022.  This is equivalent to the standard
2829    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2830
2831    ISO2022 provides many mechanisms to encode several character sets
2832    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2833    is encoded using bytes less than 128.  This may make the encoded
2834    text a little bit longer, but the text passes more easily through
2835    several types of gateway, some of which strip off the MSB (Most
2836    Significant Bit).
2837
2838    There are two kinds of character sets: control character sets and
2839    graphic character sets.  The former contain control characters such
2840    as `newline' and `escape' to provide control functions (control
2841    functions are also provided by escape sequences).  The latter
2842    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2843    two control character sets and many graphic character sets.
2844
2845    Graphic character sets are classified into one of the following
2846    four classes, according to the number of bytes (DIMENSION) and
2847    number of characters in one dimension (CHARS) of the set:
2848    - DIMENSION1_CHARS94
2849    - DIMENSION1_CHARS96
2850    - DIMENSION2_CHARS94
2851    - DIMENSION2_CHARS96
2852
2853    In addition, each character set is assigned an identification tag,
2854    unique for each set, called the "final character" (denoted as <F>
2855    hereafter).  The <F> of each character set is decided by ECMA(*)
2856    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2857    (0x30..0x3F are for private use only).
2858
2859    Note (*): ECMA = European Computer Manufacturers Association
2860
2861    Here are examples of graphic character sets [NAME(<F>)]:
2862         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2863         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2864         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2865         o DIMENSION2_CHARS96 -- none for the moment
2866
2867    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2868         C0 [0x00..0x1F] -- control character plane 0
2869         GL [0x20..0x7F] -- graphic character plane 0
2870         C1 [0x80..0x9F] -- control character plane 1
2871         GR [0xA0..0xFF] -- graphic character plane 1
2872
2873    A control character set is directly designated and invoked to C0 or
2874    C1 by an escape sequence.  The most common case is that:
2875    - ISO646's  control character set is designated/invoked to C0, and
2876    - ISO6429's control character set is designated/invoked to C1,
2877    and usually these designations/invocations are omitted in encoded
2878    text.  In a 7-bit environment, only C0 can be used, and a control
2879    character for C1 is encoded by an appropriate escape sequence to
2880    fit into the environment.  All control characters for C1 are
2881    defined to have corresponding escape sequences.
2882
2883    A graphic character set is at first designated to one of four
2884    graphic registers (G0 through G3), then these graphic registers are
2885    invoked to GL or GR.  These designations and invocations can be
2886    done independently.  The most common case is that G0 is invoked to
2887    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2888    these invocations and designations are omitted in encoded text.
2889    In a 7-bit environment, only GL can be used.
2890
2891    When a graphic character set of CHARS94 is invoked to GL, codes
2892    0x20 and 0x7F of the GL area work as control characters SPACE and
2893    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2894    be used.
2895
2896    There are two ways of invocation: locking-shift and single-shift.
2897    With locking-shift, the invocation lasts until the next different
2898    invocation, whereas with single-shift, the invocation affects the
2899    following character only and doesn't affect the locking-shift
2900    state.  Invocations are done by the following control characters or
2901    escape sequences:
2902
2903    ----------------------------------------------------------------------
2904    abbrev  function                  cntrl escape seq   description
2905    ----------------------------------------------------------------------
2906    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2907    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2908    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2909    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2910    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2911    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2912    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2913    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2914    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2915    ----------------------------------------------------------------------
2916    (*) These are not used by any known coding system.
2917
2918    Control characters for these functions are defined by macros
2919    ISO_CODE_XXX in `coding.h'.
2920
2921    Designations are done by the following escape sequences:
2922    ----------------------------------------------------------------------
2923    escape sequence      description
2924    ----------------------------------------------------------------------
2925    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2926    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2927    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2928    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2929    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2930    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2931    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2932    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2933    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2934    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2935    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2936    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2937    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2938    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2939    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2940    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2941    ----------------------------------------------------------------------
2942
2943    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2944    of dimension 1, chars 94, and final character <F>, etc...
2945
2946    Note (*): Although these designations are not allowed in ISO2022,
2947    Emacs accepts them on decoding, and produces them on encoding
2948    CHARS96 character sets in a coding system which is characterized as
2949    7-bit environment, non-locking-shift, and non-single-shift.
2950
2951    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2952    '(' must be omitted.  We refer to this as "short-form" hereafter.
2953
2954    Now you may notice that there are a lot of ways of encoding the
2955    same multilingual text in ISO2022.  Actually, there exist many
2956    coding systems such as Compound Text (used in X11's inter client
2957    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2958    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2959    localized platforms), and all of these are variants of ISO2022.
2960
2961    In addition to the above, Emacs handles two more kinds of escape
2962    sequences: ISO6429's direction specification and Emacs' private
2963    sequence for specifying character composition.
2964
2965    ISO6429's direction specification takes the following form:
2966         o CSI ']'      -- end of the current direction
2967         o CSI '0' ']'  -- end of the current direction
2968         o CSI '1' ']'  -- start of left-to-right text
2969         o CSI '2' ']'  -- start of right-to-left text
2970    The control character CSI (0x9B: control sequence introducer) is
2971    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2972
2973    Character composition specification takes the following form:
2974         o ESC '0' -- start relative composition
2975         o ESC '1' -- end composition
2976         o ESC '2' -- start rule-base composition (*)
2977         o ESC '3' -- start relative composition with alternate chars  (**)
2978         o ESC '4' -- start rule-base composition with alternate chars  (**)
2979   Since these are not standard escape sequences of any ISO standard,
2980   the use of them with these meanings is restricted to Emacs only.
2981
2982   (*) This form is used only in Emacs 20.7 and older versions,
2983   but newer versions can safely decode it.
2984   (**) This form is used only in Emacs 21.1 and newer versions,
2985   and older versions can't decode it.
2986
2987   Here's a list of example usages of these composition escape
2988   sequences (categorized by `enum composition_method').
2989
2990   COMPOSITION_RELATIVE:
2991         ESC 0 CHAR [ CHAR ] ESC 1
2992   COMPOSITION_WITH_RULE:
2993         ESC 2 CHAR [ RULE CHAR ] ESC 1
2994   COMPOSITION_WITH_ALTCHARS:
2995         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2996   COMPOSITION_WITH_RULE_ALTCHARS:
2997         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2998
2999 enum iso_code_class_type iso_code_class[256];
3000
3001 #define SAFE_CHARSET_P(coding, id)      \
3002   ((id) <= (coding)->max_charset_id     \
3003    && (coding)->safe_charsets[id] != 255)
3004
3005
3006 #define SHIFT_OUT_OK(category)  \
3007   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
3008
3009 static void
3010 setup_iso_safe_charsets (attrs)
3011      Lisp_Object attrs;
3012 {
3013   Lisp_Object charset_list, safe_charsets;
3014   Lisp_Object request;
3015   Lisp_Object reg_usage;
3016   Lisp_Object tail;
3017   int reg94, reg96;
3018   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
3019   int max_charset_id;
3020
3021   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3022   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
3023       && ! EQ (charset_list, Viso_2022_charset_list))
3024     {
3025       CODING_ATTR_CHARSET_LIST (attrs)
3026         = charset_list = Viso_2022_charset_list;
3027       ASET (attrs, coding_attr_safe_charsets, Qnil);
3028     }
3029
3030   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3031     return;
3032
3033   max_charset_id = 0;
3034   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3035     {
3036       int id = XINT (XCAR (tail));
3037       if (max_charset_id < id)
3038         max_charset_id = id;
3039     }
3040
3041   safe_charsets = make_uninit_string (max_charset_id + 1);
3042   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
3043   request = AREF (attrs, coding_attr_iso_request);
3044   reg_usage = AREF (attrs, coding_attr_iso_usage);
3045   reg94 = XINT (XCAR (reg_usage));
3046   reg96 = XINT (XCDR (reg_usage));
3047
3048   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3049     {
3050       Lisp_Object id;
3051       Lisp_Object reg;
3052       struct charset *charset;
3053
3054       id = XCAR (tail);
3055       charset = CHARSET_FROM_ID (XINT (id));
3056       reg = Fcdr (Fassq (id, request));
3057       if (! NILP (reg))
3058         SSET (safe_charsets, XINT (id), XINT (reg));
3059       else if (charset->iso_chars_96)
3060         {
3061           if (reg96 < 4)
3062             SSET (safe_charsets, XINT (id), reg96);
3063         }
3064       else
3065         {
3066           if (reg94 < 4)
3067             SSET (safe_charsets, XINT (id), reg94);
3068         }
3069     }
3070   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3071 }
3072
3073
3074 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3075    Check if a text is encoded in one of ISO-2022 based codig systems.
3076    If it is, return 1, else return 0.  */
3077
3078 static int
3079 detect_coding_iso_2022 (coding, detect_info)
3080      struct coding_system *coding;
3081      struct coding_detection_info *detect_info;
3082 {
3083   const unsigned char *src = coding->source, *src_base = src;
3084   const unsigned char *src_end = coding->source + coding->src_bytes;
3085   int multibytep = coding->src_multibyte;
3086   int single_shifting = 0;
3087   int id;
3088   int c, c1;
3089   int consumed_chars = 0;
3090   int i;
3091   int rejected = 0;
3092   int found = 0;
3093   int composition_count = -1;
3094
3095   detect_info->checked |= CATEGORY_MASK_ISO;
3096
3097   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3098     {
3099       struct coding_system *this = &(coding_categories[i]);
3100       Lisp_Object attrs, val;
3101
3102       if (this->id < 0)
3103         continue;
3104       attrs = CODING_ID_ATTRS (this->id);
3105       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3106           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3107         setup_iso_safe_charsets (attrs);
3108       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3109       this->max_charset_id = SCHARS (val) - 1;
3110       this->safe_charsets = SDATA (val);
3111     }
3112
3113   /* A coding system of this category is always ASCII compatible.  */
3114   src += coding->head_ascii;
3115
3116   while (rejected != CATEGORY_MASK_ISO)
3117     {
3118       src_base = src;
3119       ONE_MORE_BYTE (c);
3120       switch (c)
3121         {
3122         case ISO_CODE_ESC:
3123           if (inhibit_iso_escape_detection)
3124             break;
3125           single_shifting = 0;
3126           ONE_MORE_BYTE (c);
3127           if (c >= '(' && c <= '/')
3128             {
3129               /* Designation sequence for a charset of dimension 1.  */
3130               ONE_MORE_BYTE (c1);
3131               if (c1 < ' ' || c1 >= 0x80
3132                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3133                 /* Invalid designation sequence.  Just ignore.  */
3134                 break;
3135             }
3136           else if (c == '$')
3137             {
3138               /* Designation sequence for a charset of dimension 2.  */
3139               ONE_MORE_BYTE (c);
3140               if (c >= '@' && c <= 'B')
3141                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3142                 id = iso_charset_table[1][0][c];
3143               else if (c >= '(' && c <= '/')
3144                 {
3145                   ONE_MORE_BYTE (c1);
3146                   if (c1 < ' ' || c1 >= 0x80
3147                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3148                     /* Invalid designation sequence.  Just ignore.  */
3149                     break;
3150                 }
3151               else
3152                 /* Invalid designation sequence.  Just ignore it.  */
3153                 break;
3154             }
3155           else if (c == 'N' || c == 'O')
3156             {
3157               /* ESC <Fe> for SS2 or SS3.  */
3158               single_shifting = 1;
3159               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3160               break;
3161             }
3162           else if (c == '1')
3163             {
3164               /* End of composition.  */
3165               if (composition_count < 0
3166                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3167                 /* Invalid */
3168                 break;
3169               composition_count = -1;
3170               found |= CATEGORY_MASK_ISO;
3171             }
3172           else if (c >= '0' && c <= '4')
3173             {
3174               /* ESC <Fp> for start/end composition.  */
3175               composition_count = 0;
3176               break;
3177             }
3178           else
3179             {
3180               /* Invalid escape sequence.  Just ignore it.  */
3181               break;
3182             }
3183
3184           /* We found a valid designation sequence for CHARSET.  */
3185           rejected |= CATEGORY_MASK_ISO_8BIT;
3186           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3187                               id))
3188             found |= CATEGORY_MASK_ISO_7;
3189           else
3190             rejected |= CATEGORY_MASK_ISO_7;
3191           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3192                               id))
3193             found |= CATEGORY_MASK_ISO_7_TIGHT;
3194           else
3195             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3196           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3197                               id))
3198             found |= CATEGORY_MASK_ISO_7_ELSE;
3199           else
3200             rejected |= CATEGORY_MASK_ISO_7_ELSE;
3201           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3202                               id))
3203             found |= CATEGORY_MASK_ISO_8_ELSE;
3204           else
3205             rejected |= CATEGORY_MASK_ISO_8_ELSE;
3206           break;
3207
3208         case ISO_CODE_SO:
3209         case ISO_CODE_SI:
3210           /* Locking shift out/in.  */
3211           if (inhibit_iso_escape_detection)
3212             break;
3213           single_shifting = 0;
3214           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3215           break;
3216
3217         case ISO_CODE_CSI:
3218           /* Control sequence introducer.  */
3219           single_shifting = 0;
3220           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3221           found |= CATEGORY_MASK_ISO_8_ELSE;
3222           goto check_extra_latin;
3223
3224         case ISO_CODE_SS2:
3225         case ISO_CODE_SS3:
3226           /* Single shift.   */
3227           if (inhibit_iso_escape_detection)
3228             break;
3229           single_shifting = 0;
3230           rejected |= CATEGORY_MASK_ISO_7BIT;
3231           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3232               & CODING_ISO_FLAG_SINGLE_SHIFT)
3233             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
3234           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3235               & CODING_ISO_FLAG_SINGLE_SHIFT)
3236             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3237           if (single_shifting)
3238             break;
3239           goto check_extra_latin;
3240
3241         default:
3242           if (c < 0)
3243             continue;
3244           if (c < 0x80)
3245             {
3246               if (composition_count >= 0)
3247                 composition_count++;
3248               single_shifting = 0;
3249               break;
3250             }
3251           if (c >= 0xA0)
3252             {
3253               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3254               found |= CATEGORY_MASK_ISO_8_1;
3255               /* Check the length of succeeding codes of the range
3256                  0xA0..0FF.  If the byte length is even, we include
3257                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3258                  only when we are not single shifting.  */
3259               if (! single_shifting
3260                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3261                 {
3262                   int i = 1;
3263                   while (src < src_end)
3264                     {
3265                       src_base = src;
3266                       ONE_MORE_BYTE (c);
3267                       if (c < 0xA0)
3268                         {
3269                           src = src_base;
3270                           break;
3271                         }
3272                       i++;
3273                     }
3274
3275                   if (i & 1 && src < src_end)
3276                     {
3277                       rejected |= CATEGORY_MASK_ISO_8_2;
3278                       if (composition_count >= 0)
3279                         composition_count += i;
3280                     }
3281                   else
3282                     {
3283                       found |= CATEGORY_MASK_ISO_8_2;
3284                       if (composition_count >= 0)
3285                         composition_count += i / 2;
3286                     }
3287                 }
3288               break;
3289             }
3290         check_extra_latin:
3291           single_shifting = 0;
3292           if (! VECTORP (Vlatin_extra_code_table)
3293               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3294             {
3295               rejected = CATEGORY_MASK_ISO;
3296               break;
3297             }
3298           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3299               & CODING_ISO_FLAG_LATIN_EXTRA)
3300             found |= CATEGORY_MASK_ISO_8_1;
3301           else
3302             rejected |= CATEGORY_MASK_ISO_8_1;
3303           rejected |= CATEGORY_MASK_ISO_8_2;
3304         }
3305     }
3306   detect_info->rejected |= CATEGORY_MASK_ISO;
3307   return 0;
3308
3309  no_more_source:
3310   detect_info->rejected |= rejected;
3311   detect_info->found |= (found & ~rejected);
3312   return 1;
3313 }
3314
3315
3316 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3317    escape sequence should be kept.  */
3318 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3319   do {                                                                  \
3320     int id, prev;                                                       \
3321                                                                         \
3322     if (final < '0' || final >= 128                                     \
3323         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3324         || !SAFE_CHARSET_P (coding, id))                                \
3325       {                                                                 \
3326         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3327         chars_96 = -1;                                                  \
3328         break;                                                          \
3329       }                                                                 \
3330     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3331     if (id == charset_jisx0201_roman)                                   \
3332       {                                                                 \
3333         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3334           id = charset_ascii;                                           \
3335       }                                                                 \
3336     else if (id == charset_jisx0208_1978)                               \
3337       {                                                                 \
3338         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3339           id = charset_jisx0208;                                        \
3340       }                                                                 \
3341     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3342     /* If there was an invalid designation to REG previously, and this  \
3343        designation is ASCII to REG, we should keep this designation     \
3344        sequence.  */                                                    \
3345     if (prev == -2 && id == charset_ascii)                              \
3346       chars_96 = -1;                                                    \
3347   } while (0)
3348
3349
3350 /* Handle these composition sequence (ALT: alternate char):
3351
3352    (1) relative composition: ESC 0 CHAR ... ESC 1
3353    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3354    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3355    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3356
3357    When the start sequence (ESC 0/2/3/4) is found, this annotation
3358    header is produced.
3359
3360         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3361
3362    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3363    produced until the end sequence (ESC 1) is found:
3364
3365    (1) CHAR ... CHAR
3366    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3367    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3368    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3369
3370    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3371    annotation header is updated as below:
3372
3373    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3374    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3375    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3376    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3377
3378    If an error is found while composing, the annotation header is
3379    changed to:
3380
3381         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3382
3383    and the sequence [ -2 DECODED-RULE ] is changed to the original
3384    byte sequence as below:
3385         o the original byte sequence is B: [ B -1 ]
3386         o the original byte sequence is B1 B2: [ B1 B2 ]
3387    and the sequence [ -1 -1 ] is changed to the original byte
3388    sequence:
3389         [ ESC '0' ]
3390 */
3391
3392 /* Decode a composition rule C1 and maybe one more byte from the
3393    source, and set RULE to the encoded composition rule, NBYTES to the
3394    length of the composition rule.  If the rule is invalid, set RULE
3395    to some negative value.  */
3396
3397 #define DECODE_COMPOSITION_RULE(rule, nbytes)                           \
3398   do {                                                                  \
3399     rule = c1 - 32;                                                     \
3400     if (rule < 0)                                                       \
3401       break;                                                            \
3402     if (rule < 81)              /* old format (before ver.21) */        \
3403       {                                                                 \
3404         int gref = (rule) / 9;                                          \
3405         int nref = (rule) % 9;                                          \
3406         if (gref == 4) gref = 10;                                       \
3407         if (nref == 4) nref = 10;                                       \
3408         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3409         nbytes = 1;                                                     \
3410       }                                                                 \
3411     else                        /* new format (after ver.21) */         \
3412       {                                                                 \
3413         int c;                                                          \
3414                                                                         \
3415         ONE_MORE_BYTE (c);                                              \
3416         rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32);             \
3417         if (rule >= 0)                                                  \
3418           rule += 0x100;   /* to destinguish it from the old format */  \
3419         nbytes = 2;                                                     \
3420       }                                                                 \
3421   } while (0)
3422
3423 #define ENCODE_COMPOSITION_RULE(rule)                           \
3424   do {                                                          \
3425     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3426                                                                 \
3427     if (rule < 0x100)           /* old format */                \
3428       {                                                         \
3429         if (gref == 10) gref = 4;                               \
3430         if (nref == 10) nref = 4;                               \
3431         charbuf[idx] = 32 + gref * 9 + nref;                    \
3432         charbuf[idx + 1] = -1;                                  \
3433         new_chars++;                                            \
3434       }                                                         \
3435     else                                /* new format */        \
3436       {                                                         \
3437         charbuf[idx] = 32 + 81 + gref;                          \
3438         charbuf[idx + 1] = 32 + nref;                           \
3439         new_chars += 2;                                         \
3440       }                                                         \
3441   } while (0)
3442
3443 /* Finish the current composition as invalid.  */
3444
3445 static int finish_composition P_ ((int *, struct composition_status *));
3446
3447 static int
3448 finish_composition (charbuf, cmp_status)
3449      int *charbuf;
3450      struct composition_status *cmp_status;
3451 {
3452   int idx = - cmp_status->length;
3453   int new_chars;
3454
3455   /* Recover the original ESC sequence */
3456   charbuf[idx++] = ISO_CODE_ESC;
3457   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3458                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3459                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3460                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3461                     : '4');
3462   charbuf[idx++] = -2;
3463   charbuf[idx++] = 0;
3464   charbuf[idx++] = -1;
3465   new_chars = cmp_status->nchars;
3466   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3467     for (; idx < 0; idx++)
3468       {
3469         int elt = charbuf[idx];
3470
3471         if (elt == -2)
3472           {
3473             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3474             idx++;
3475           }
3476         else if (elt == -1)
3477           {
3478             charbuf[idx++] = ISO_CODE_ESC;
3479             charbuf[idx] = '0';
3480             new_chars += 2;
3481           }
3482       }
3483   cmp_status->state = COMPOSING_NO;
3484   return new_chars;
3485 }
3486
3487 /* If characers are under composition, finish the composition.  */
3488 #define MAYBE_FINISH_COMPOSITION()                              \
3489   do {                                                          \
3490     if (cmp_status->state != COMPOSING_NO)                      \
3491       char_offset += finish_composition (charbuf, cmp_status);  \
3492   } while (0)
3493
3494 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3495
3496    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3497    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3498    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3499    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3500
3501    Produce this annotation sequence now:
3502
3503    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3504 */
3505
3506 #define DECODE_COMPOSITION_START(c1)                                       \
3507   do {                                                                     \
3508     if (c1 == '0'                                                          \
3509         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3510              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3511             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3512                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3513       {                                                                    \
3514         *charbuf++ = -1;                                                   \
3515         *charbuf++= -1;                                                    \
3516         cmp_status->state = COMPOSING_CHAR;                                \
3517         cmp_status->length += 2;                                           \
3518       }                                                                    \
3519     else                                                                   \
3520       {                                                                    \
3521         MAYBE_FINISH_COMPOSITION ();                                       \
3522         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3523                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3524                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3525                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3526         cmp_status->state                                                  \
3527           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3528         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3529         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3530         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3531         coding->annotated = 1;                                             \
3532       }                                                                    \
3533   } while (0)
3534
3535
3536 /* Handle composition end sequence ESC 1.  */
3537
3538 #define DECODE_COMPOSITION_END()                                        \
3539   do {                                                                  \
3540     if (cmp_status->nchars == 0                                         \
3541         || ((cmp_status->state == COMPOSING_CHAR)                       \
3542             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3543       {                                                                 \
3544         MAYBE_FINISH_COMPOSITION ();                                    \
3545         goto invalid_code;                                              \
3546       }                                                                 \
3547     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3548       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3549     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3550       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3551     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3552     char_offset += cmp_status->nchars;                                  \
3553     cmp_status->state = COMPOSING_NO;                                   \
3554   } while (0)
3555
3556 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3557
3558 #define STORE_COMPOSITION_RULE(rule)    \
3559   do {                                  \
3560     *charbuf++ = -2;                    \
3561     *charbuf++ = rule;                  \
3562     cmp_status->length += 2;            \
3563     cmp_status->state--;                \
3564   } while (0)
3565
3566 /* Store a composed char or a component char C in charbuf, and update
3567    cmp_status.  */
3568
3569 #define STORE_COMPOSITION_CHAR(c)                                       \
3570   do {                                                                  \
3571     *charbuf++ = (c);                                                   \
3572     cmp_status->length++;                                               \
3573     if (cmp_status->state == COMPOSING_CHAR)                            \
3574       cmp_status->nchars++;                                             \
3575     else                                                                \
3576       cmp_status->ncomps++;                                             \
3577     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3578         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3579             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3580       cmp_status->state++;                                              \
3581   } while (0)
3582
3583
3584 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3585
3586 static void
3587 decode_coding_iso_2022 (coding)
3588      struct coding_system *coding;
3589 {
3590   const unsigned char *src = coding->source + coding->consumed;
3591   const unsigned char *src_end = coding->source + coding->src_bytes;
3592   const unsigned char *src_base;
3593   int *charbuf = coding->charbuf + coding->charbuf_used;
3594   /* We may produce two annocations (charset and composition) in one
3595      loop and one more charset annocation at the end.  */
3596   int *charbuf_end
3597     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3598   int consumed_chars = 0, consumed_chars_base;
3599   int multibytep = coding->src_multibyte;
3600   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3601   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3602   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3603   int charset_id_2, charset_id_3;
3604   struct charset *charset;
3605   int c;
3606   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3607   Lisp_Object attrs, charset_list;
3608   int char_offset = coding->produced_char;
3609   int last_offset = char_offset;
3610   int last_id = charset_ascii;
3611   int eol_crlf =
3612     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3613   int byte_after_cr = -1;
3614   int i;
3615
3616   CODING_GET_INFO (coding, attrs, charset_list);
3617   setup_iso_safe_charsets (attrs);
3618   /* Charset list may have been changed.  */
3619   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3620   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3621
3622   if (cmp_status->state != COMPOSING_NO)
3623     {
3624       for (i = 0; i < cmp_status->length; i++)
3625         *charbuf++ = cmp_status->carryover[i];
3626       coding->annotated = 1;
3627     }
3628
3629   while (1)
3630     {
3631       int c1, c2, c3;
3632
3633       src_base = src;
3634       consumed_chars_base = consumed_chars;
3635
3636       if (charbuf >= charbuf_end)
3637         {
3638           if (byte_after_cr >= 0)
3639             src_base--;
3640           break;
3641         }
3642
3643       if (byte_after_cr >= 0)
3644         c1 = byte_after_cr, byte_after_cr = -1;
3645       else
3646         ONE_MORE_BYTE (c1);
3647       if (c1 < 0)
3648         goto invalid_code;
3649
3650       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3651         {
3652           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3653           char_offset++;
3654           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3655           continue;
3656         }
3657
3658       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3659         {
3660           if (c1 == ISO_CODE_ESC)
3661             {
3662               if (src + 1 >= src_end)
3663                 goto no_more_source;
3664               *charbuf++ = ISO_CODE_ESC;
3665               char_offset++;
3666               if (src[0] == '%' && src[1] == '@')
3667                 {
3668                   src += 2;
3669                   consumed_chars += 2;
3670                   char_offset += 2;
3671                   /* We are sure charbuf can contain two more chars. */
3672                   *charbuf++ = '%';
3673                   *charbuf++ = '@';
3674                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3675                 }
3676             }
3677           else
3678             {
3679               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3680               char_offset++;
3681             }
3682           continue;
3683         }
3684
3685       if ((cmp_status->state == COMPOSING_RULE
3686            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3687           && c1 != ISO_CODE_ESC)
3688         {
3689           int rule, nbytes;
3690
3691           DECODE_COMPOSITION_RULE (rule, nbytes);
3692           if (rule < 0)
3693             goto invalid_code;
3694           STORE_COMPOSITION_RULE (rule);
3695           continue;
3696         }
3697
3698       /* We produce at most one character.  */
3699       switch (iso_code_class [c1])
3700         {
3701         case ISO_0x20_or_0x7F:
3702           if (charset_id_0 < 0
3703               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3704             /* This is SPACE or DEL.  */
3705             charset = CHARSET_FROM_ID (charset_ascii);
3706           else
3707             charset = CHARSET_FROM_ID (charset_id_0);
3708           break;
3709
3710         case ISO_graphic_plane_0:
3711           if (charset_id_0 < 0)
3712             charset = CHARSET_FROM_ID (charset_ascii);
3713           else
3714             charset = CHARSET_FROM_ID (charset_id_0);
3715           break;
3716
3717         case ISO_0xA0_or_0xFF:
3718           if (charset_id_1 < 0
3719               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3720               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3721             goto invalid_code;
3722           /* This is a graphic character, we fall down ... */
3723
3724         case ISO_graphic_plane_1:
3725           if (charset_id_1 < 0)
3726             goto invalid_code;
3727           charset = CHARSET_FROM_ID (charset_id_1);
3728           break;
3729
3730         case ISO_control_0:
3731           if (eol_crlf && c1 == '\r')
3732             ONE_MORE_BYTE (byte_after_cr);
3733           MAYBE_FINISH_COMPOSITION ();
3734           charset = CHARSET_FROM_ID (charset_ascii);
3735           break;
3736
3737         case ISO_control_1:
3738           goto invalid_code;
3739
3740         case ISO_shift_out:
3741           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3742               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3743             goto invalid_code;
3744           CODING_ISO_INVOCATION (coding, 0) = 1;
3745           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3746           continue;
3747
3748         case ISO_shift_in:
3749           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3750             goto invalid_code;
3751           CODING_ISO_INVOCATION (coding, 0) = 0;
3752           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3753           continue;
3754
3755         case ISO_single_shift_2_7:
3756           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3757             goto invalid_code;
3758         case ISO_single_shift_2:
3759           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3760             goto invalid_code;
3761           /* SS2 is handled as an escape sequence of ESC 'N' */
3762           c1 = 'N';
3763           goto label_escape_sequence;
3764
3765         case ISO_single_shift_3:
3766           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3767             goto invalid_code;
3768           /* SS2 is handled as an escape sequence of ESC 'O' */
3769           c1 = 'O';
3770           goto label_escape_sequence;
3771
3772         case ISO_control_sequence_introducer:
3773           /* CSI is handled as an escape sequence of ESC '[' ...  */
3774           c1 = '[';
3775           goto label_escape_sequence;
3776
3777         case ISO_escape:
3778           ONE_MORE_BYTE (c1);
3779         label_escape_sequence:
3780           /* Escape sequences handled here are invocation,
3781              designation, direction specification, and character
3782              composition specification.  */
3783           switch (c1)
3784             {
3785             case '&':           /* revision of following character set */
3786               ONE_MORE_BYTE (c1);
3787               if (!(c1 >= '@' && c1 <= '~'))
3788                 goto invalid_code;
3789               ONE_MORE_BYTE (c1);
3790               if (c1 != ISO_CODE_ESC)
3791                 goto invalid_code;
3792               ONE_MORE_BYTE (c1);
3793               goto label_escape_sequence;
3794
3795             case '$':           /* designation of 2-byte character set */
3796               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3797                 goto invalid_code;
3798               {
3799                 int reg, chars96;
3800
3801                 ONE_MORE_BYTE (c1);
3802                 if (c1 >= '@' && c1 <= 'B')
3803                   {     /* designation of JISX0208.1978, GB2312.1980,
3804                            or JISX0208.1980 */
3805                     reg = 0, chars96 = 0;
3806                   }
3807                 else if (c1 >= 0x28 && c1 <= 0x2B)
3808                   { /* designation of DIMENSION2_CHARS94 character set */
3809                     reg = c1 - 0x28, chars96 = 0;
3810                     ONE_MORE_BYTE (c1);
3811                   }
3812                 else if (c1 >= 0x2C && c1 <= 0x2F)
3813                   { /* designation of DIMENSION2_CHARS96 character set */
3814                     reg = c1 - 0x2C, chars96 = 1;
3815                     ONE_MORE_BYTE (c1);
3816                   }
3817                 else
3818                   goto invalid_code;
3819                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3820                 /* We must update these variables now.  */
3821                 if (reg == 0)
3822                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3823                 else if (reg == 1)
3824                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3825                 if (chars96 < 0)
3826                   goto invalid_code;
3827               }
3828               continue;
3829
3830             case 'n':           /* invocation of locking-shift-2 */
3831               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3832                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3833                 goto invalid_code;
3834               CODING_ISO_INVOCATION (coding, 0) = 2;
3835               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3836               continue;
3837
3838             case 'o':           /* invocation of locking-shift-3 */
3839               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3840                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3841                 goto invalid_code;
3842               CODING_ISO_INVOCATION (coding, 0) = 3;
3843               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3844               continue;
3845
3846             case 'N':           /* invocation of single-shift-2 */
3847               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3848                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3849                 goto invalid_code;
3850               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3851               if (charset_id_2 < 0)
3852                 charset = CHARSET_FROM_ID (charset_ascii);
3853               else
3854                 charset = CHARSET_FROM_ID (charset_id_2);
3855               ONE_MORE_BYTE (c1);
3856               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3857                 goto invalid_code;
3858               break;
3859
3860             case 'O':           /* invocation of single-shift-3 */
3861               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3862                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3863                 goto invalid_code;
3864               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3865               if (charset_id_3 < 0)
3866                 charset = CHARSET_FROM_ID (charset_ascii);
3867               else
3868                 charset = CHARSET_FROM_ID (charset_id_3);
3869               ONE_MORE_BYTE (c1);
3870               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3871                 goto invalid_code;
3872               break;
3873
3874             case '0': case '2': case '3': case '4': /* start composition */
3875               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3876                 goto invalid_code;
3877               if (last_id != charset_ascii)
3878                 {
3879                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3880                   last_id = charset_ascii;
3881                   last_offset = char_offset;
3882                 }
3883               DECODE_COMPOSITION_START (c1);
3884               continue;
3885
3886             case '1':           /* end composition */
3887               if (cmp_status->state == COMPOSING_NO)
3888                 goto invalid_code;
3889               DECODE_COMPOSITION_END ();
3890               continue;
3891
3892             case '[':           /* specification of direction */
3893               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3894                 goto invalid_code;
3895               /* For the moment, nested direction is not supported.
3896                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3897                  left-to-right, and nozero means right-to-left.  */
3898               ONE_MORE_BYTE (c1);
3899               switch (c1)
3900                 {
3901                 case ']':       /* end of the current direction */
3902                   coding->mode &= ~CODING_MODE_DIRECTION;
3903
3904                 case '0':       /* end of the current direction */
3905                 case '1':       /* start of left-to-right direction */
3906                   ONE_MORE_BYTE (c1);
3907                   if (c1 == ']')
3908                     coding->mode &= ~CODING_MODE_DIRECTION;
3909                   else
3910                     goto invalid_code;
3911                   break;
3912
3913                 case '2':       /* start of right-to-left direction */
3914                   ONE_MORE_BYTE (c1);
3915                   if (c1 == ']')
3916                     coding->mode |= CODING_MODE_DIRECTION;
3917                   else
3918                     goto invalid_code;
3919                   break;
3920
3921                 default:
3922                   goto invalid_code;
3923                 }
3924               continue;
3925
3926             case '%':
3927               ONE_MORE_BYTE (c1);
3928               if (c1 == '/')
3929                 {
3930                   /* CTEXT extended segment:
3931                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3932                      We keep these bytes as is for the moment.
3933                      They may be decoded by post-read-conversion.  */
3934                   int dim, M, L;
3935                   int size;
3936
3937                   ONE_MORE_BYTE (dim);
3938                   if (dim < 0 || dim > 4)
3939                     goto invalid_code;
3940                   ONE_MORE_BYTE (M);
3941                   if (M < 128)
3942                     goto invalid_code;
3943                   ONE_MORE_BYTE (L);
3944                   if (L < 128)
3945                     goto invalid_code;
3946                   size = ((M - 128) * 128) + (L - 128);
3947                   if (charbuf + 6 > charbuf_end)
3948                     goto break_loop;
3949                   *charbuf++ = ISO_CODE_ESC;
3950                   *charbuf++ = '%';
3951                   *charbuf++ = '/';
3952                   *charbuf++ = dim;
3953                   *charbuf++ = BYTE8_TO_CHAR (M);
3954                   *charbuf++ = BYTE8_TO_CHAR (L);
3955                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3956                 }
3957               else if (c1 == 'G')
3958                 {
3959                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3960                      ESC % G --UTF-8-BYTES-- ESC % @
3961                      We keep these bytes as is for the moment.
3962                      They may be decoded by post-read-conversion.  */
3963                   if (charbuf + 3 > charbuf_end)
3964                     goto break_loop;
3965                   *charbuf++ = ISO_CODE_ESC;
3966                   *charbuf++ = '%';
3967                   *charbuf++ = 'G';
3968                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3969                 }
3970               else
3971                 goto invalid_code;
3972               continue;
3973               break;
3974
3975             default:
3976               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3977                 goto invalid_code;
3978               {
3979                 int reg, chars96;
3980
3981                 if (c1 >= 0x28 && c1 <= 0x2B)
3982                   { /* designation of DIMENSION1_CHARS94 character set */
3983                     reg = c1 - 0x28, chars96 = 0;
3984                     ONE_MORE_BYTE (c1);
3985                   }
3986                 else if (c1 >= 0x2C && c1 <= 0x2F)
3987                   { /* designation of DIMENSION1_CHARS96 character set */
3988                     reg = c1 - 0x2C, chars96 = 1;
3989                     ONE_MORE_BYTE (c1);
3990                   }
3991                 else
3992                   goto invalid_code;
3993                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3994                 /* We must update these variables now.  */
3995                 if (reg == 0)
3996                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3997                 else if (reg == 1)
3998                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3999                 if (chars96 < 0)
4000                   goto invalid_code;
4001               }
4002               continue;
4003             }
4004         }
4005
4006       if (cmp_status->state == COMPOSING_NO
4007           && charset->id != charset_ascii
4008           && last_id != charset->id)
4009         {
4010           if (last_id != charset_ascii)
4011             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4012           last_id = charset->id;
4013           last_offset = char_offset;
4014         }
4015
4016       /* Now we know CHARSET and 1st position code C1 of a character.
4017          Produce a decoded character while getting 2nd and 3rd
4018          position codes C2, C3 if necessary.  */
4019       if (CHARSET_DIMENSION (charset) > 1)
4020         {
4021           ONE_MORE_BYTE (c2);
4022           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
4023               || ((c1 & 0x80) != (c2 & 0x80)))
4024             /* C2 is not in a valid range.  */
4025             goto invalid_code;
4026           if (CHARSET_DIMENSION (charset) == 2)
4027             c1 = (c1 << 8) | c2;
4028           else
4029             {
4030               ONE_MORE_BYTE (c3);
4031               if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
4032                   || ((c1 & 0x80) != (c3 & 0x80)))
4033                 /* C3 is not in a valid range.  */
4034                 goto invalid_code;
4035               c1 = (c1 << 16) | (c2 << 8) | c2;
4036             }
4037         }
4038       c1 &= 0x7F7F7F;
4039       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4040       if (c < 0)
4041         {
4042           MAYBE_FINISH_COMPOSITION ();
4043           for (; src_base < src; src_base++, char_offset++)
4044             {
4045               if (ASCII_BYTE_P (*src_base))
4046                 *charbuf++ = *src_base;
4047               else
4048                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4049             }
4050         }
4051       else if (cmp_status->state == COMPOSING_NO)
4052         {
4053           *charbuf++ = c;
4054           char_offset++;
4055         }
4056       else if ((cmp_status->state == COMPOSING_CHAR
4057                 ? cmp_status->nchars
4058                 : cmp_status->ncomps)
4059                >= MAX_COMPOSITION_COMPONENTS)
4060         {
4061           /* Too long composition.  */
4062           MAYBE_FINISH_COMPOSITION ();
4063           *charbuf++ = c;
4064           char_offset++;
4065         }
4066       else
4067         STORE_COMPOSITION_CHAR (c);
4068       continue;
4069
4070     invalid_code:
4071       MAYBE_FINISH_COMPOSITION ();
4072       src = src_base;
4073       consumed_chars = consumed_chars_base;
4074       ONE_MORE_BYTE (c);
4075       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4076       char_offset++;
4077       coding->errors++;
4078       continue;
4079
4080     break_loop:
4081       break;
4082     }
4083
4084  no_more_source:
4085   if (cmp_status->state != COMPOSING_NO)
4086     {
4087       if (coding->mode & CODING_MODE_LAST_BLOCK)
4088         MAYBE_FINISH_COMPOSITION ();
4089       else
4090         {
4091           charbuf -= cmp_status->length;
4092           for (i = 0; i < cmp_status->length; i++)
4093             cmp_status->carryover[i] = charbuf[i];
4094         }
4095     }
4096   else if (last_id != charset_ascii)
4097     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4098   coding->consumed_char += consumed_chars_base;
4099   coding->consumed = src_base - coding->source;
4100   coding->charbuf_used = charbuf - coding->charbuf;
4101 }
4102
4103
4104 /* ISO2022 encoding stuff.  */
4105
4106 /*
4107    It is not enough to say just "ISO2022" on encoding, we have to
4108    specify more details.  In Emacs, each coding system of ISO2022
4109    variant has the following specifications:
4110         1. Initial designation to G0 thru G3.
4111         2. Allows short-form designation?
4112         3. ASCII should be designated to G0 before control characters?
4113         4. ASCII should be designated to G0 at end of line?
4114         5. 7-bit environment or 8-bit environment?
4115         6. Use locking-shift?
4116         7. Use Single-shift?
4117    And the following two are only for Japanese:
4118         8. Use ASCII in place of JIS0201-1976-Roman?
4119         9. Use JISX0208-1983 in place of JISX0208-1978?
4120    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4121    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4122    details.
4123 */
4124
4125 /* Produce codes (escape sequence) for designating CHARSET to graphic
4126    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4127    '@', 'A', or 'B' and the coding system CODING allows, produce
4128    designation sequence of short-form.  */
4129
4130 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4131   do {                                                                  \
4132     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4133     char *intermediate_char_94 = "()*+";                                \
4134     char *intermediate_char_96 = ",-./";                                \
4135     int revision = -1;                                                  \
4136     int c;                                                              \
4137                                                                         \
4138     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4139       revision = CHARSET_ISO_REVISION (charset);                        \
4140                                                                         \
4141     if (revision >= 0)                                                  \
4142       {                                                                 \
4143         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4144         EMIT_ONE_BYTE ('@' + revision);                                 \
4145       }                                                                 \
4146     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4147     if (CHARSET_DIMENSION (charset) == 1)                               \
4148       {                                                                 \
4149         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4150           c = intermediate_char_94[reg];                                \
4151         else                                                            \
4152           c = intermediate_char_96[reg];                                \
4153         EMIT_ONE_ASCII_BYTE (c);                                        \
4154       }                                                                 \
4155     else                                                                \
4156       {                                                                 \
4157         EMIT_ONE_ASCII_BYTE ('$');                                      \
4158         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4159           {                                                             \
4160             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4161                 || reg != 0                                             \
4162                 || final_char < '@' || final_char > 'B')                \
4163               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4164           }                                                             \
4165         else                                                            \
4166           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4167       }                                                                 \
4168     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4169                                                                         \
4170     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4171   } while (0)
4172
4173
4174 /* The following two macros produce codes (control character or escape
4175    sequence) for ISO2022 single-shift functions (single-shift-2 and
4176    single-shift-3).  */
4177
4178 #define ENCODE_SINGLE_SHIFT_2                                           \
4179   do {                                                                  \
4180     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4181       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4182     else                                                                \
4183       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4184     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4185   } while (0)
4186
4187
4188 #define ENCODE_SINGLE_SHIFT_3                                           \
4189   do {                                                                  \
4190     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4191       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4192     else                                                                \
4193       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4194     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4195   } while (0)
4196
4197
4198 /* The following four macros produce codes (control character or
4199    escape sequence) for ISO2022 locking-shift functions (shift-in,
4200    shift-out, locking-shift-2, and locking-shift-3).  */
4201
4202 #define ENCODE_SHIFT_IN                                 \
4203   do {                                                  \
4204     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4205     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4206   } while (0)
4207
4208
4209 #define ENCODE_SHIFT_OUT                                \
4210   do {                                                  \
4211     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4212     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4213   } while (0)
4214
4215
4216 #define ENCODE_LOCKING_SHIFT_2                          \
4217   do {                                                  \
4218     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4219     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4220   } while (0)
4221
4222
4223 #define ENCODE_LOCKING_SHIFT_3                          \
4224   do {                                                  \
4225     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4226     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4227   } while (0)
4228
4229
4230 /* Produce codes for a DIMENSION1 character whose character set is
4231    CHARSET and whose position-code is C1.  Designation and invocation
4232    sequences are also produced in advance if necessary.  */
4233
4234 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4235   do {                                                                  \
4236     int id = CHARSET_ID (charset);                                      \
4237                                                                         \
4238     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4239         && id == charset_ascii)                                         \
4240       {                                                                 \
4241         id = charset_jisx0201_roman;                                    \
4242         charset = CHARSET_FROM_ID (id);                                 \
4243       }                                                                 \
4244                                                                         \
4245     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4246       {                                                                 \
4247         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4248           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4249         else                                                            \
4250           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4251         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4252         break;                                                          \
4253       }                                                                 \
4254     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4255       {                                                                 \
4256         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4257         break;                                                          \
4258       }                                                                 \
4259     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4260       {                                                                 \
4261         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4262         break;                                                          \
4263       }                                                                 \
4264     else                                                                \
4265       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4266          must invoke it, or, at first, designate it to some graphic     \
4267          register.  Then repeat the loop to actually produce the        \
4268          character.  */                                                 \
4269       dst = encode_invocation_designation (charset, coding, dst,        \
4270                                            &produced_chars);            \
4271   } while (1)
4272
4273
4274 /* Produce codes for a DIMENSION2 character whose character set is
4275    CHARSET and whose position-codes are C1 and C2.  Designation and
4276    invocation codes are also produced in advance if necessary.  */
4277
4278 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4279   do {                                                                  \
4280     int id = CHARSET_ID (charset);                                      \
4281                                                                         \
4282     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4283         && id == charset_jisx0208)                                      \
4284       {                                                                 \
4285         id = charset_jisx0208_1978;                                     \
4286         charset = CHARSET_FROM_ID (id);                                 \
4287       }                                                                 \
4288                                                                         \
4289     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4290       {                                                                 \
4291         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4292           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4293         else                                                            \
4294           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4295         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4296         break;                                                          \
4297       }                                                                 \
4298     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4299       {                                                                 \
4300         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4301         break;                                                          \
4302       }                                                                 \
4303     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4304       {                                                                 \
4305         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4306         break;                                                          \
4307       }                                                                 \
4308     else                                                                \
4309       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4310          must invoke it, or, at first, designate it to some graphic     \
4311          register.  Then repeat the loop to actually produce the        \
4312          character.  */                                                 \
4313       dst = encode_invocation_designation (charset, coding, dst,        \
4314                                            &produced_chars);            \
4315   } while (1)
4316
4317
4318 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4319   do {                                                                     \
4320     int code = ENCODE_CHAR ((charset),(c));                                \
4321                                                                            \
4322     if (CHARSET_DIMENSION (charset) == 1)                                  \
4323       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4324     else                                                                   \
4325       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4326   } while (0)
4327
4328
4329 /* Produce designation and invocation codes at a place pointed by DST
4330    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4331    Return new DST.  */
4332
4333 unsigned char *
4334 encode_invocation_designation (charset, coding, dst, p_nchars)
4335      struct charset *charset;
4336      struct coding_system *coding;
4337      unsigned char *dst;
4338      int *p_nchars;
4339 {
4340   int multibytep = coding->dst_multibyte;
4341   int produced_chars = *p_nchars;
4342   int reg;                      /* graphic register number */
4343   int id = CHARSET_ID (charset);
4344
4345   /* At first, check designations.  */
4346   for (reg = 0; reg < 4; reg++)
4347     if (id == CODING_ISO_DESIGNATION (coding, reg))
4348       break;
4349
4350   if (reg >= 4)
4351     {
4352       /* CHARSET is not yet designated to any graphic registers.  */
4353       /* At first check the requested designation.  */
4354       reg = CODING_ISO_REQUEST (coding, id);
4355       if (reg < 0)
4356         /* Since CHARSET requests no special designation, designate it
4357            to graphic register 0.  */
4358         reg = 0;
4359
4360       ENCODE_DESIGNATION (charset, reg, coding);
4361     }
4362
4363   if (CODING_ISO_INVOCATION (coding, 0) != reg
4364       && CODING_ISO_INVOCATION (coding, 1) != reg)
4365     {
4366       /* Since the graphic register REG is not invoked to any graphic
4367          planes, invoke it to graphic plane 0.  */
4368       switch (reg)
4369         {
4370         case 0:                 /* graphic register 0 */
4371           ENCODE_SHIFT_IN;
4372           break;
4373
4374         case 1:                 /* graphic register 1 */
4375           ENCODE_SHIFT_OUT;
4376           break;
4377
4378         case 2:                 /* graphic register 2 */
4379           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4380             ENCODE_SINGLE_SHIFT_2;
4381           else
4382             ENCODE_LOCKING_SHIFT_2;
4383           break;
4384
4385         case 3:                 /* graphic register 3 */
4386           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4387             ENCODE_SINGLE_SHIFT_3;
4388           else
4389             ENCODE_LOCKING_SHIFT_3;
4390           break;
4391         }
4392     }
4393
4394   *p_nchars = produced_chars;
4395   return dst;
4396 }
4397
4398 /* The following three macros produce codes for indicating direction
4399    of text.  */
4400 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
4401   do {                                                                  \
4402     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
4403       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
4404     else                                                                \
4405       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
4406   } while (0)
4407
4408
4409 #define ENCODE_DIRECTION_R2L()                  \
4410   do {                                          \
4411     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4412     EMIT_TWO_ASCII_BYTES ('2', ']');            \
4413   } while (0)
4414
4415
4416 #define ENCODE_DIRECTION_L2R()                  \
4417   do {                                          \
4418     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4419     EMIT_TWO_ASCII_BYTES ('0', ']');            \
4420   } while (0)
4421
4422
4423 /* Produce codes for designation and invocation to reset the graphic
4424    planes and registers to initial state.  */
4425 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4426   do {                                                                  \
4427     int reg;                                                            \
4428     struct charset *charset;                                            \
4429                                                                         \
4430     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4431       ENCODE_SHIFT_IN;                                                  \
4432     for (reg = 0; reg < 4; reg++)                                       \
4433       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4434           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4435               != CODING_ISO_INITIAL (coding, reg)))                     \
4436         {                                                               \
4437           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4438           ENCODE_DESIGNATION (charset, reg, coding);                    \
4439         }                                                               \
4440   } while (0)
4441
4442
4443 /* Produce designation sequences of charsets in the line started from
4444    SRC to a place pointed by DST, and return updated DST.
4445
4446    If the current block ends before any end-of-line, we may fail to
4447    find all the necessary designations.  */
4448
4449 static unsigned char *
4450 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
4451      struct coding_system *coding;
4452      int *charbuf, *charbuf_end;
4453      unsigned char *dst;
4454 {
4455   struct charset *charset;
4456   /* Table of charsets to be designated to each graphic register.  */
4457   int r[4];
4458   int c, found = 0, reg;
4459   int produced_chars = 0;
4460   int multibytep = coding->dst_multibyte;
4461   Lisp_Object attrs;
4462   Lisp_Object charset_list;
4463
4464   attrs = CODING_ID_ATTRS (coding->id);
4465   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4466   if (EQ (charset_list, Qiso_2022))
4467     charset_list = Viso_2022_charset_list;
4468
4469   for (reg = 0; reg < 4; reg++)
4470     r[reg] = -1;
4471
4472   while (found < 4)
4473     {
4474       int id;
4475
4476       c = *charbuf++;
4477       if (c == '\n')
4478         break;
4479       charset = char_charset (c, charset_list, NULL);
4480       id = CHARSET_ID (charset);
4481       reg = CODING_ISO_REQUEST (coding, id);
4482       if (reg >= 0 && r[reg] < 0)
4483         {
4484           found++;
4485           r[reg] = id;
4486         }
4487     }
4488
4489   if (found)
4490     {
4491       for (reg = 0; reg < 4; reg++)
4492         if (r[reg] >= 0
4493             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4494           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4495     }
4496
4497   return dst;
4498 }
4499
4500 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4501
4502 static int
4503 encode_coding_iso_2022 (coding)
4504      struct coding_system *coding;
4505 {
4506   int multibytep = coding->dst_multibyte;
4507   int *charbuf = coding->charbuf;
4508   int *charbuf_end = charbuf + coding->charbuf_used;
4509   unsigned char *dst = coding->destination + coding->produced;
4510   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4511   int safe_room = 16;
4512   int bol_designation
4513     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4514        && CODING_ISO_BOL (coding));
4515   int produced_chars = 0;
4516   Lisp_Object attrs, eol_type, charset_list;
4517   int ascii_compatible;
4518   int c;
4519   int preferred_charset_id = -1;
4520
4521   CODING_GET_INFO (coding, attrs, charset_list);
4522   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4523   if (VECTORP (eol_type))
4524     eol_type = Qunix;
4525
4526   setup_iso_safe_charsets (attrs);
4527   /* Charset list may have been changed.  */
4528   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4529   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4530
4531   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4532
4533   while (charbuf < charbuf_end)
4534     {
4535       ASSURE_DESTINATION (safe_room);
4536
4537       if (bol_designation)
4538         {
4539           unsigned char *dst_prev = dst;
4540
4541           /* We have to produce designation sequences if any now.  */
4542           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4543           bol_designation = 0;
4544           /* We are sure that designation sequences are all ASCII bytes.  */
4545           produced_chars += dst - dst_prev;
4546         }
4547
4548       c = *charbuf++;
4549
4550       if (c < 0)
4551         {
4552           /* Handle an annotation.  */
4553           switch (*charbuf)
4554             {
4555             case CODING_ANNOTATE_COMPOSITION_MASK:
4556               /* Not yet implemented.  */
4557               break;
4558             case CODING_ANNOTATE_CHARSET_MASK:
4559               preferred_charset_id = charbuf[2];
4560               if (preferred_charset_id >= 0
4561                   && NILP (Fmemq (make_number (preferred_charset_id),
4562                                   charset_list)))
4563                 preferred_charset_id = -1;
4564               break;
4565             default:
4566               abort ();
4567             }
4568           charbuf += -c - 1;
4569           continue;
4570         }
4571
4572       /* Now encode the character C.  */
4573       if (c < 0x20 || c == 0x7F)
4574         {
4575           if (c == '\n'
4576               || (c == '\r' && EQ (eol_type, Qmac)))
4577             {
4578               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4579                 ENCODE_RESET_PLANE_AND_REGISTER ();
4580               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4581                 {
4582                   int i;
4583
4584                   for (i = 0; i < 4; i++)
4585                     CODING_ISO_DESIGNATION (coding, i)
4586                       = CODING_ISO_INITIAL (coding, i);
4587                 }
4588               bol_designation
4589                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4590             }
4591           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4592             ENCODE_RESET_PLANE_AND_REGISTER ();
4593           EMIT_ONE_ASCII_BYTE (c);
4594         }
4595       else if (ASCII_CHAR_P (c))
4596         {
4597           if (ascii_compatible)
4598             EMIT_ONE_ASCII_BYTE (c);
4599           else
4600             {
4601               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4602               ENCODE_ISO_CHARACTER (charset, c);
4603             }
4604         }
4605       else if (CHAR_BYTE8_P (c))
4606         {
4607           c = CHAR_TO_BYTE8 (c);
4608           EMIT_ONE_BYTE (c);
4609         }
4610       else
4611         {
4612           struct charset *charset;
4613
4614           if (preferred_charset_id >= 0)
4615             {
4616               charset = CHARSET_FROM_ID (preferred_charset_id);
4617               if (! CHAR_CHARSET_P (c, charset))
4618                 charset = char_charset (c, charset_list, NULL);
4619             }
4620           else
4621             charset = char_charset (c, charset_list, NULL);
4622           if (!charset)
4623             {
4624               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4625                 {
4626                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4627                   charset = CHARSET_FROM_ID (charset_ascii);
4628                 }
4629               else
4630                 {
4631                   c = coding->default_char;
4632                   charset = char_charset (c, charset_list, NULL);
4633                 }
4634             }
4635           ENCODE_ISO_CHARACTER (charset, c);
4636         }
4637     }
4638
4639   if (coding->mode & CODING_MODE_LAST_BLOCK
4640       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4641     {
4642       ASSURE_DESTINATION (safe_room);
4643       ENCODE_RESET_PLANE_AND_REGISTER ();
4644     }
4645   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4646   CODING_ISO_BOL (coding) = bol_designation;
4647   coding->produced_char += produced_chars;
4648   coding->produced = dst - coding->destination;
4649   return 0;
4650 }
4651
4652 \f
4653 /*** 8,9. SJIS and BIG5 handlers ***/
4654
4655 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4656    quite widely.  So, for the moment, Emacs supports them in the bare
4657    C code.  But, in the future, they may be supported only by CCL.  */
4658
4659 /* SJIS is a coding system encoding three character sets: ASCII, right
4660    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4661    as is.  A character of charset katakana-jisx0201 is encoded by
4662    "position-code + 0x80".  A character of charset japanese-jisx0208
4663    is encoded in 2-byte but two position-codes are divided and shifted
4664    so that it fit in the range below.
4665
4666    --- CODE RANGE of SJIS ---
4667    (character set)      (range)
4668    ASCII                0x00 .. 0x7F
4669    KATAKANA-JISX0201    0xA0 .. 0xDF
4670    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4671             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4672    -------------------------------
4673
4674 */
4675
4676 /* BIG5 is a coding system encoding two character sets: ASCII and
4677    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4678    character set and is encoded in two-byte.
4679
4680    --- CODE RANGE of BIG5 ---
4681    (character set)      (range)
4682    ASCII                0x00 .. 0x7F
4683    Big5 (1st byte)      0xA1 .. 0xFE
4684         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4685    --------------------------
4686
4687   */
4688
4689 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4690    Check if a text is encoded in SJIS.  If it is, return
4691    CATEGORY_MASK_SJIS, else return 0.  */
4692
4693 static int
4694 detect_coding_sjis (coding, detect_info)
4695      struct coding_system *coding;
4696      struct coding_detection_info *detect_info;
4697 {
4698   const unsigned char *src = coding->source, *src_base;
4699   const unsigned char *src_end = coding->source + coding->src_bytes;
4700   int multibytep = coding->src_multibyte;
4701   int consumed_chars = 0;
4702   int found = 0;
4703   int c;
4704   Lisp_Object attrs, charset_list;
4705   int max_first_byte_of_2_byte_code;
4706
4707   CODING_GET_INFO (coding, attrs, charset_list);
4708   max_first_byte_of_2_byte_code
4709     = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4710
4711   detect_info->checked |= CATEGORY_MASK_SJIS;
4712   /* A coding system of this category is always ASCII compatible.  */
4713   src += coding->head_ascii;
4714
4715   while (1)
4716     {
4717       src_base = src;
4718       ONE_MORE_BYTE (c);
4719       if (c < 0x80)
4720         continue;
4721       if ((c >= 0x81 && c <= 0x9F)
4722           || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4723         {
4724           ONE_MORE_BYTE (c);
4725           if (c < 0x40 || c == 0x7F || c > 0xFC)
4726             break;
4727           found = CATEGORY_MASK_SJIS;
4728         }
4729       else if (c >= 0xA0 && c < 0xE0)
4730         found = CATEGORY_MASK_SJIS;
4731       else
4732         break;
4733     }
4734   detect_info->rejected |= CATEGORY_MASK_SJIS;
4735   return 0;
4736
4737  no_more_source:
4738   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4739     {
4740       detect_info->rejected |= CATEGORY_MASK_SJIS;
4741       return 0;
4742     }
4743   detect_info->found |= found;
4744   return 1;
4745 }
4746
4747 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4748    Check if a text is encoded in BIG5.  If it is, return
4749    CATEGORY_MASK_BIG5, else return 0.  */
4750
4751 static int
4752 detect_coding_big5 (coding, detect_info)
4753      struct coding_system *coding;
4754      struct coding_detection_info *detect_info;
4755 {
4756   const unsigned char *src = coding->source, *src_base;
4757   const unsigned char *src_end = coding->source + coding->src_bytes;
4758   int multibytep = coding->src_multibyte;
4759   int consumed_chars = 0;
4760   int found = 0;
4761   int c;
4762
4763   detect_info->checked |= CATEGORY_MASK_BIG5;
4764   /* A coding system of this category is always ASCII compatible.  */
4765   src += coding->head_ascii;
4766
4767   while (1)
4768     {
4769       src_base = src;
4770       ONE_MORE_BYTE (c);
4771       if (c < 0x80)
4772         continue;
4773       if (c >= 0xA1)
4774         {
4775           ONE_MORE_BYTE (c);
4776           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4777             return 0;
4778           found = CATEGORY_MASK_BIG5;
4779         }
4780       else
4781         break;
4782     }
4783   detect_info->rejected |= CATEGORY_MASK_BIG5;
4784   return 0;
4785
4786  no_more_source:
4787   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4788     {
4789       detect_info->rejected |= CATEGORY_MASK_BIG5;
4790       return 0;
4791     }
4792   detect_info->found |= found;
4793   return 1;
4794 }
4795
4796 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4797    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4798
4799 static void
4800 decode_coding_sjis (coding)
4801      struct coding_system *coding;
4802 {
4803   const unsigned char *src = coding->source + coding->consumed;
4804   const unsigned char *src_end = coding->source + coding->src_bytes;
4805   const unsigned char *src_base;
4806   int *charbuf = coding->charbuf + coding->charbuf_used;
4807   /* We may produce one charset annocation in one loop and one more at
4808      the end.  */
4809   int *charbuf_end
4810     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4811   int consumed_chars = 0, consumed_chars_base;
4812   int multibytep = coding->src_multibyte;
4813   struct charset *charset_roman, *charset_kanji, *charset_kana;
4814   struct charset *charset_kanji2;
4815   Lisp_Object attrs, charset_list, val;
4816   int char_offset = coding->produced_char;
4817   int last_offset = char_offset;
4818   int last_id = charset_ascii;
4819   int eol_crlf =
4820     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4821   int byte_after_cr = -1;
4822
4823   CODING_GET_INFO (coding, attrs, charset_list);
4824
4825   val = charset_list;
4826   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4827   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4828   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4829   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4830
4831   while (1)
4832     {
4833       int c, c1;
4834       struct charset *charset;
4835
4836       src_base = src;
4837       consumed_chars_base = consumed_chars;
4838
4839       if (charbuf >= charbuf_end)
4840         {
4841           if (byte_after_cr >= 0)
4842             src_base--;
4843           break;
4844         }
4845
4846       if (byte_after_cr >= 0)
4847         c = byte_after_cr, byte_after_cr = -1;
4848       else
4849         ONE_MORE_BYTE (c);
4850       if (c < 0)
4851         goto invalid_code;
4852       if (c < 0x80)
4853         {
4854           if (eol_crlf && c == '\r')
4855             ONE_MORE_BYTE (byte_after_cr);
4856           charset = charset_roman;
4857         }
4858       else if (c == 0x80 || c == 0xA0)
4859         goto invalid_code;
4860       else if (c >= 0xA1 && c <= 0xDF)
4861         {
4862           /* SJIS -> JISX0201-Kana */
4863           c &= 0x7F;
4864           charset = charset_kana;
4865         }
4866       else if (c <= 0xEF)
4867         {
4868           /* SJIS -> JISX0208 */
4869           ONE_MORE_BYTE (c1);
4870           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4871             goto invalid_code;
4872           c = (c << 8) | c1;
4873           SJIS_TO_JIS (c);
4874           charset = charset_kanji;
4875         }
4876       else if (c <= 0xFC && charset_kanji2)
4877         {
4878           /* SJIS -> JISX0213-2 */
4879           ONE_MORE_BYTE (c1);
4880           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4881             goto invalid_code;
4882           c = (c << 8) | c1;
4883           SJIS_TO_JIS2 (c);
4884           charset = charset_kanji2;
4885         }
4886       else
4887         goto invalid_code;
4888       if (charset->id != charset_ascii
4889           && last_id != charset->id)
4890         {
4891           if (last_id != charset_ascii)
4892             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4893           last_id = charset->id;
4894           last_offset = char_offset;
4895         }
4896       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4897       *charbuf++ = c;
4898       char_offset++;
4899       continue;
4900
4901     invalid_code:
4902       src = src_base;
4903       consumed_chars = consumed_chars_base;
4904       ONE_MORE_BYTE (c);
4905       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4906       char_offset++;
4907       coding->errors++;
4908     }
4909
4910  no_more_source:
4911   if (last_id != charset_ascii)
4912     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4913   coding->consumed_char += consumed_chars_base;
4914   coding->consumed = src_base - coding->source;
4915   coding->charbuf_used = charbuf - coding->charbuf;
4916 }
4917
4918 static void
4919 decode_coding_big5 (coding)
4920      struct coding_system *coding;
4921 {
4922   const unsigned char *src = coding->source + coding->consumed;
4923   const unsigned char *src_end = coding->source + coding->src_bytes;
4924   const unsigned char *src_base;
4925   int *charbuf = coding->charbuf + coding->charbuf_used;
4926   /* We may produce one charset annocation in one loop and one more at
4927      the end.  */
4928   int *charbuf_end
4929     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4930   int consumed_chars = 0, consumed_chars_base;
4931   int multibytep = coding->src_multibyte;
4932   struct charset *charset_roman, *charset_big5;
4933   Lisp_Object attrs, charset_list, val;
4934   int char_offset = coding->produced_char;
4935   int last_offset = char_offset;
4936   int last_id = charset_ascii;
4937   int eol_crlf =
4938     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4939   int byte_after_cr = -1;
4940
4941   CODING_GET_INFO (coding, attrs, charset_list);
4942   val = charset_list;
4943   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4944   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4945
4946   while (1)
4947     {
4948       int c, c1;
4949       struct charset *charset;
4950
4951       src_base = src;
4952       consumed_chars_base = consumed_chars;
4953
4954       if (charbuf >= charbuf_end)
4955         {
4956           if (byte_after_cr >= 0)
4957             src_base--;
4958           break;
4959         }
4960
4961       if (byte_after_cr >= 0)
4962         c = byte_after_cr, byte_after_cr = -1;
4963       else
4964         ONE_MORE_BYTE (c);
4965
4966       if (c < 0)
4967         goto invalid_code;
4968       if (c < 0x80)
4969         {
4970           if (eol_crlf && c == '\r')
4971             ONE_MORE_BYTE (byte_after_cr);
4972           charset = charset_roman;
4973         }
4974       else
4975         {
4976           /* BIG5 -> Big5 */
4977           if (c < 0xA1 || c > 0xFE)
4978             goto invalid_code;
4979           ONE_MORE_BYTE (c1);
4980           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4981             goto invalid_code;
4982           c = c << 8 | c1;
4983           charset = charset_big5;
4984         }
4985       if (charset->id != charset_ascii
4986           && last_id != charset->id)
4987         {
4988           if (last_id != charset_ascii)
4989             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4990           last_id = charset->id;
4991           last_offset = char_offset;
4992         }
4993       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4994       *charbuf++ = c;
4995       char_offset++;
4996       continue;
4997
4998     invalid_code:
4999       src = src_base;
5000       consumed_chars = consumed_chars_base;
5001       ONE_MORE_BYTE (c);
5002       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
5003       char_offset++;
5004       coding->errors++;
5005     }
5006
5007  no_more_source:
5008   if (last_id != charset_ascii)
5009     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5010   coding->consumed_char += consumed_chars_base;
5011   coding->consumed = src_base - coding->source;
5012   coding->charbuf_used = charbuf - coding->charbuf;
5013 }
5014
5015 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
5016    This function can encode charsets `ascii', `katakana-jisx0201',
5017    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
5018    are sure that all these charsets are registered as official charset
5019    (i.e. do not have extended leading-codes).  Characters of other
5020    charsets are produced without any encoding.  If SJIS_P is 1, encode
5021    SJIS text, else encode BIG5 text.  */
5022
5023 static int
5024 encode_coding_sjis (coding)
5025      struct coding_system *coding;
5026 {
5027   int multibytep = coding->dst_multibyte;
5028   int *charbuf = coding->charbuf;
5029   int *charbuf_end = charbuf + coding->charbuf_used;
5030   unsigned char *dst = coding->destination + coding->produced;
5031   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5032   int safe_room = 4;
5033   int produced_chars = 0;
5034   Lisp_Object attrs, charset_list, val;
5035   int ascii_compatible;
5036   struct charset *charset_roman, *charset_kanji, *charset_kana;
5037   struct charset *charset_kanji2;
5038   int c;
5039
5040   CODING_GET_INFO (coding, attrs, charset_list);
5041   val = charset_list;
5042   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5043   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5044   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5045   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
5046
5047   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5048
5049   while (charbuf < charbuf_end)
5050     {
5051       ASSURE_DESTINATION (safe_room);
5052       c = *charbuf++;
5053       /* Now encode the character C.  */
5054       if (ASCII_CHAR_P (c) && ascii_compatible)
5055         EMIT_ONE_ASCII_BYTE (c);
5056       else if (CHAR_BYTE8_P (c))
5057         {
5058           c = CHAR_TO_BYTE8 (c);
5059           EMIT_ONE_BYTE (c);
5060         }
5061       else
5062         {
5063           unsigned code;
5064           struct charset *charset = char_charset (c, charset_list, &code);
5065
5066           if (!charset)
5067             {
5068               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5069                 {
5070                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5071                   charset = CHARSET_FROM_ID (charset_ascii);
5072                 }
5073               else
5074                 {
5075                   c = coding->default_char;
5076                   charset = char_charset (c, charset_list, &code);
5077                 }
5078             }
5079           if (code == CHARSET_INVALID_CODE (charset))
5080             abort ();
5081           if (charset == charset_kanji)
5082             {
5083               int c1, c2;
5084               JIS_TO_SJIS (code);
5085               c1 = code >> 8, c2 = code & 0xFF;
5086               EMIT_TWO_BYTES (c1, c2);
5087             }
5088           else if (charset == charset_kana)
5089             EMIT_ONE_BYTE (code | 0x80);
5090           else if (charset_kanji2 && charset == charset_kanji2)
5091             {
5092               int c1, c2;
5093
5094               c1 = code >> 8;
5095               if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5096                   || c1 == 0x28
5097                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5098                 {
5099                   JIS_TO_SJIS2 (code);
5100                   c1 = code >> 8, c2 = code & 0xFF;
5101                   EMIT_TWO_BYTES (c1, c2);
5102                 }
5103               else
5104                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5105             }
5106           else
5107             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5108         }
5109     }
5110   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5111   coding->produced_char += produced_chars;
5112   coding->produced = dst - coding->destination;
5113   return 0;
5114 }
5115
5116 static int
5117 encode_coding_big5 (coding)
5118      struct coding_system *coding;
5119 {
5120   int multibytep = coding->dst_multibyte;
5121   int *charbuf = coding->charbuf;
5122   int *charbuf_end = charbuf + coding->charbuf_used;
5123   unsigned char *dst = coding->destination + coding->produced;
5124   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5125   int safe_room = 4;
5126   int produced_chars = 0;
5127   Lisp_Object attrs, charset_list, val;
5128   int ascii_compatible;
5129   struct charset *charset_roman, *charset_big5;
5130   int c;
5131
5132   CODING_GET_INFO (coding, attrs, charset_list);
5133   val = charset_list;
5134   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5135   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5136   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5137
5138   while (charbuf < charbuf_end)
5139     {
5140       ASSURE_DESTINATION (safe_room);
5141       c = *charbuf++;
5142       /* Now encode the character C.  */
5143       if (ASCII_CHAR_P (c) && ascii_compatible)
5144         EMIT_ONE_ASCII_BYTE (c);
5145       else if (CHAR_BYTE8_P (c))
5146         {
5147           c = CHAR_TO_BYTE8 (c);
5148           EMIT_ONE_BYTE (c);
5149         }
5150       else
5151         {
5152           unsigned code;
5153           struct charset *charset = char_charset (c, charset_list, &code);
5154
5155           if (! charset)
5156             {
5157               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5158                 {
5159                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5160                   charset = CHARSET_FROM_ID (charset_ascii);
5161                 }
5162               else
5163                 {
5164                   c = coding->default_char;
5165                   charset = char_charset (c, charset_list, &code);
5166                 }
5167             }
5168           if (code == CHARSET_INVALID_CODE (charset))
5169             abort ();
5170           if (charset == charset_big5)
5171             {
5172               int c1, c2;
5173
5174               c1 = code >> 8, c2 = code & 0xFF;
5175               EMIT_TWO_BYTES (c1, c2);
5176             }
5177           else
5178             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5179         }
5180     }
5181   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5182   coding->produced_char += produced_chars;
5183   coding->produced = dst - coding->destination;
5184   return 0;
5185 }
5186
5187 \f
5188 /*** 10. CCL handlers ***/
5189
5190 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5191    Check if a text is encoded in a coding system of which
5192    encoder/decoder are written in CCL program.  If it is, return
5193    CATEGORY_MASK_CCL, else return 0.  */
5194
5195 static int
5196 detect_coding_ccl (coding, detect_info)
5197      struct coding_system *coding;
5198      struct coding_detection_info *detect_info;
5199 {
5200   const unsigned char *src = coding->source, *src_base;
5201   const unsigned char *src_end = coding->source + coding->src_bytes;
5202   int multibytep = coding->src_multibyte;
5203   int consumed_chars = 0;
5204   int found = 0;
5205   unsigned char *valids;
5206   int head_ascii = coding->head_ascii;
5207   Lisp_Object attrs;
5208
5209   detect_info->checked |= CATEGORY_MASK_CCL;
5210
5211   coding = &coding_categories[coding_category_ccl];
5212   valids = CODING_CCL_VALIDS (coding);
5213   attrs = CODING_ID_ATTRS (coding->id);
5214   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5215     src += head_ascii;
5216
5217   while (1)
5218     {
5219       int c;
5220
5221       src_base = src;
5222       ONE_MORE_BYTE (c);
5223       if (c < 0 || ! valids[c])
5224         break;
5225       if ((valids[c] > 1))
5226         found = CATEGORY_MASK_CCL;
5227     }
5228   detect_info->rejected |= CATEGORY_MASK_CCL;
5229   return 0;
5230
5231  no_more_source:
5232   detect_info->found |= found;
5233   return 1;
5234 }
5235
5236 static void
5237 decode_coding_ccl (coding)
5238      struct coding_system *coding;
5239 {
5240   const unsigned char *src = coding->source + coding->consumed;
5241   const unsigned char *src_end = coding->source + coding->src_bytes;
5242   int *charbuf = coding->charbuf + coding->charbuf_used;
5243   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5244   int consumed_chars = 0;
5245   int multibytep = coding->src_multibyte;
5246   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5247   int source_charbuf[1024];
5248   int source_byteidx[1024];
5249   Lisp_Object attrs, charset_list;
5250
5251   CODING_GET_INFO (coding, attrs, charset_list);
5252
5253   while (1)
5254     {
5255       const unsigned char *p = src;
5256       int i = 0;
5257
5258       if (multibytep)
5259         while (i < 1024 && p < src_end)
5260           {
5261             source_byteidx[i] = p - src;
5262             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5263           }
5264       else
5265         while (i < 1024 && p < src_end)
5266           source_charbuf[i++] = *p++;
5267
5268       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5269         ccl->last_block = 1;
5270       ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5271                   charset_list);
5272       charbuf += ccl->produced;
5273       if (multibytep && ccl->consumed < i)
5274         src += source_byteidx[ccl->consumed];
5275       else
5276         src += ccl->consumed;
5277       consumed_chars += ccl->consumed;
5278       if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
5279         break;
5280     }
5281
5282   switch (ccl->status)
5283     {
5284     case CCL_STAT_SUSPEND_BY_SRC:
5285       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5286       break;
5287     case CCL_STAT_SUSPEND_BY_DST:
5288       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5289       break;
5290     case CCL_STAT_QUIT:
5291     case CCL_STAT_INVALID_CMD:
5292       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5293       break;
5294     default:
5295       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5296       break;
5297     }
5298   coding->consumed_char += consumed_chars;
5299   coding->consumed = src - coding->source;
5300   coding->charbuf_used = charbuf - coding->charbuf;
5301 }
5302
5303 static int
5304 encode_coding_ccl (coding)
5305      struct coding_system *coding;
5306 {
5307   struct ccl_program ccl;
5308   int multibytep = coding->dst_multibyte;
5309   int *charbuf = coding->charbuf;
5310   int *charbuf_end = charbuf + coding->charbuf_used;
5311   unsigned char *dst = coding->destination + coding->produced;
5312   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5313   int destination_charbuf[1024];
5314   int i, produced_chars = 0;
5315   Lisp_Object attrs, charset_list;
5316
5317   CODING_GET_INFO (coding, attrs, charset_list);
5318   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
5319
5320   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
5321   ccl.dst_multibyte = coding->dst_multibyte;
5322
5323   while (charbuf < charbuf_end)
5324     {
5325       ccl_driver (&ccl, charbuf, destination_charbuf,
5326                   charbuf_end - charbuf, 1024, charset_list);
5327       if (multibytep)
5328         {
5329           ASSURE_DESTINATION (ccl.produced * 2);
5330           for (i = 0; i < ccl.produced; i++)
5331             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5332         }
5333       else
5334         {
5335           ASSURE_DESTINATION (ccl.produced);
5336           for (i = 0; i < ccl.produced; i++)
5337             *dst++ = destination_charbuf[i] & 0xFF;
5338           produced_chars += ccl.produced;
5339         }
5340       charbuf += ccl.consumed;
5341       if (ccl.status == CCL_STAT_QUIT
5342           || ccl.status == CCL_STAT_INVALID_CMD)
5343         break;
5344     }
5345
5346   switch (ccl.status)
5347     {
5348     case CCL_STAT_SUSPEND_BY_SRC:
5349       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5350       break;
5351     case CCL_STAT_SUSPEND_BY_DST:
5352       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5353       break;
5354     case CCL_STAT_QUIT:
5355     case CCL_STAT_INVALID_CMD:
5356       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5357       break;
5358     default:
5359       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5360       break;
5361     }
5362
5363   coding->produced_char += produced_chars;
5364   coding->produced = dst - coding->destination;
5365   return 0;
5366 }
5367
5368
5369 \f
5370 /*** 10, 11. no-conversion handlers ***/
5371
5372 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5373
5374 static void
5375 decode_coding_raw_text (coding)
5376      struct coding_system *coding;
5377 {
5378   int eol_crlf =
5379     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5380
5381   coding->chars_at_source = 1;
5382   coding->consumed_char = coding->src_chars;
5383   coding->consumed = coding->src_bytes;
5384   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5385     {
5386       coding->consumed_char--;
5387       coding->consumed--;
5388       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5389     }
5390   else
5391     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5392 }
5393
5394 static int
5395 encode_coding_raw_text (coding)
5396      struct coding_system *coding;
5397 {
5398   int multibytep = coding->dst_multibyte;
5399   int *charbuf = coding->charbuf;
5400   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5401   unsigned char *dst = coding->destination + coding->produced;
5402   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5403   int produced_chars = 0;
5404   int c;
5405
5406   if (multibytep)
5407     {
5408       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5409
5410       if (coding->src_multibyte)
5411         while (charbuf < charbuf_end)
5412           {
5413             ASSURE_DESTINATION (safe_room);
5414             c = *charbuf++;
5415             if (ASCII_CHAR_P (c))
5416               EMIT_ONE_ASCII_BYTE (c);
5417             else if (CHAR_BYTE8_P (c))
5418               {
5419                 c = CHAR_TO_BYTE8 (c);
5420                 EMIT_ONE_BYTE (c);
5421               }
5422             else
5423               {
5424                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5425
5426                 CHAR_STRING_ADVANCE (c, p1);
5427                 while (p0 < p1)
5428                   {
5429                     EMIT_ONE_BYTE (*p0);
5430                     p0++;
5431                   }
5432               }
5433           }
5434       else
5435         while (charbuf < charbuf_end)
5436           {
5437             ASSURE_DESTINATION (safe_room);
5438             c = *charbuf++;
5439             EMIT_ONE_BYTE (c);
5440           }
5441     }
5442   else
5443     {
5444       if (coding->src_multibyte)
5445         {
5446           int safe_room = MAX_MULTIBYTE_LENGTH;
5447
5448           while (charbuf < charbuf_end)
5449             {
5450               ASSURE_DESTINATION (safe_room);
5451               c = *charbuf++;
5452               if (ASCII_CHAR_P (c))
5453                 *dst++ = c;
5454               else if (CHAR_BYTE8_P (c))
5455                 *dst++ = CHAR_TO_BYTE8 (c);
5456               else
5457                 CHAR_STRING_ADVANCE (c, dst);
5458             }
5459         }
5460       else
5461         {
5462           ASSURE_DESTINATION (charbuf_end - charbuf);
5463           while (charbuf < charbuf_end && dst < dst_end)
5464             *dst++ = *charbuf++;
5465         }
5466       produced_chars = dst - (coding->destination + coding->produced);
5467     }
5468   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5469   coding->produced_char += produced_chars;
5470   coding->produced = dst - coding->destination;
5471   return 0;
5472 }
5473
5474 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5475    Check if a text is encoded in a charset-based coding system.  If it
5476    is, return 1, else return 0.  */
5477
5478 static int
5479 detect_coding_charset (coding, detect_info)
5480      struct coding_system *coding;
5481      struct coding_detection_info *detect_info;
5482 {
5483   const unsigned char *src = coding->source, *src_base;
5484   const unsigned char *src_end = coding->source + coding->src_bytes;
5485   int multibytep = coding->src_multibyte;
5486   int consumed_chars = 0;
5487   Lisp_Object attrs, valids, name;
5488   int found = 0;
5489   int head_ascii = coding->head_ascii;
5490   int check_latin_extra = 0;
5491
5492   detect_info->checked |= CATEGORY_MASK_CHARSET;
5493
5494   coding = &coding_categories[coding_category_charset];
5495   attrs = CODING_ID_ATTRS (coding->id);
5496   valids = AREF (attrs, coding_attr_charset_valids);
5497   name = CODING_ID_NAME (coding->id);
5498   if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5499                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5500       || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5501                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5502     check_latin_extra = 1;
5503
5504   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5505     src += head_ascii;
5506
5507   while (1)
5508     {
5509       int c;
5510       Lisp_Object val;
5511       struct charset *charset;
5512       int dim, idx;
5513
5514       src_base = src;
5515       ONE_MORE_BYTE (c);
5516       if (c < 0)
5517         continue;
5518       val = AREF (valids, c);
5519       if (NILP (val))
5520         break;
5521       if (c >= 0x80)
5522         {
5523           if (c < 0xA0
5524               && check_latin_extra
5525               && (!VECTORP (Vlatin_extra_code_table)
5526                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5527             break;
5528           found = CATEGORY_MASK_CHARSET;
5529         }
5530       if (INTEGERP (val))
5531         {
5532           charset = CHARSET_FROM_ID (XFASTINT (val));
5533           dim = CHARSET_DIMENSION (charset);
5534           for (idx = 1; idx < dim; idx++)
5535             {
5536               if (src == src_end)
5537                 goto too_short;
5538               ONE_MORE_BYTE (c);
5539               if (c < charset->code_space[(dim - 1 - idx) * 2]
5540                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5541                 break;
5542             }
5543           if (idx < dim)
5544             break;
5545         }
5546       else
5547         {
5548           idx = 1;
5549           for (; CONSP (val); val = XCDR (val))
5550             {
5551               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5552               dim = CHARSET_DIMENSION (charset);
5553               while (idx < dim)
5554                 {
5555                   if (src == src_end)
5556                     goto too_short;
5557                   ONE_MORE_BYTE (c);
5558                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5559                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5560                     break;
5561                   idx++;
5562                 }
5563               if (idx == dim)
5564                 {
5565                   val = Qnil;
5566                   break;
5567                 }
5568             }
5569           if (CONSP (val))
5570             break;
5571         }
5572     }
5573  too_short:
5574   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5575   return 0;
5576
5577  no_more_source:
5578   detect_info->found |= found;
5579   return 1;
5580 }
5581
5582 static void
5583 decode_coding_charset (coding)
5584      struct coding_system *coding;
5585 {
5586   const unsigned char *src = coding->source + coding->consumed;
5587   const unsigned char *src_end = coding->source + coding->src_bytes;
5588   const unsigned char *src_base;
5589   int *charbuf = coding->charbuf + coding->charbuf_used;
5590   /* We may produce one charset annocation in one loop and one more at
5591      the end.  */
5592   int *charbuf_end
5593     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5594   int consumed_chars = 0, consumed_chars_base;
5595   int multibytep = coding->src_multibyte;
5596   Lisp_Object attrs, charset_list, valids;
5597   int char_offset = coding->produced_char;
5598   int last_offset = char_offset;
5599   int last_id = charset_ascii;
5600   int eol_crlf =
5601     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5602   int byte_after_cr = -1;
5603
5604   CODING_GET_INFO (coding, attrs, charset_list);
5605   valids = AREF (attrs, coding_attr_charset_valids);
5606
5607   while (1)
5608     {
5609       int c;
5610       Lisp_Object val;
5611       struct charset *charset;
5612       int dim;
5613       int len = 1;
5614       unsigned code;
5615
5616       src_base = src;
5617       consumed_chars_base = consumed_chars;
5618
5619       if (charbuf >= charbuf_end)
5620         {
5621           if (byte_after_cr >= 0)
5622             src_base--;
5623           break;
5624         }
5625
5626       if (byte_after_cr >= 0)
5627         {
5628           c = byte_after_cr;
5629           byte_after_cr = -1;
5630         }
5631       else
5632         {
5633           ONE_MORE_BYTE (c);
5634           if (eol_crlf && c == '\r')
5635             ONE_MORE_BYTE (byte_after_cr);
5636         }
5637       if (c < 0)
5638         goto invalid_code;
5639       code = c;
5640
5641       val = AREF (valids, c);
5642       if (! INTEGERP (val) && ! CONSP (val))
5643         goto invalid_code;
5644       if (INTEGERP (val))
5645         {
5646           charset = CHARSET_FROM_ID (XFASTINT (val));
5647           dim = CHARSET_DIMENSION (charset);
5648           while (len < dim)
5649             {
5650               ONE_MORE_BYTE (c);
5651               code = (code << 8) | c;
5652               len++;
5653             }
5654           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5655                               charset, code, c);
5656         }
5657       else
5658         {
5659           /* VAL is a list of charset IDs.  It is assured that the
5660              list is sorted by charset dimensions (smaller one
5661              comes first).  */
5662           while (CONSP (val))
5663             {
5664               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5665               dim = CHARSET_DIMENSION (charset);
5666               while (len < dim)
5667                 {
5668                   ONE_MORE_BYTE (c);
5669                   code = (code << 8) | c;
5670                   len++;
5671                 }
5672               CODING_DECODE_CHAR (coding, src, src_base,
5673                                   src_end, charset, code, c);
5674               if (c >= 0)
5675                 break;
5676               val = XCDR (val);
5677             }
5678         }
5679       if (c < 0)
5680         goto invalid_code;
5681       if (charset->id != charset_ascii
5682           && last_id != charset->id)
5683         {
5684           if (last_id != charset_ascii)
5685             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5686           last_id = charset->id;
5687           last_offset = char_offset;
5688         }
5689
5690       *charbuf++ = c;
5691       char_offset++;
5692       continue;
5693
5694     invalid_code:
5695       src = src_base;
5696       consumed_chars = consumed_chars_base;
5697       ONE_MORE_BYTE (c);
5698       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5699       char_offset++;
5700       coding->errors++;
5701     }
5702
5703  no_more_source:
5704   if (last_id != charset_ascii)
5705     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5706   coding->consumed_char += consumed_chars_base;
5707   coding->consumed = src_base - coding->source;
5708   coding->charbuf_used = charbuf - coding->charbuf;
5709 }
5710
5711 static int
5712 encode_coding_charset (coding)
5713      struct coding_system *coding;
5714 {
5715   int multibytep = coding->dst_multibyte;
5716   int *charbuf = coding->charbuf;
5717   int *charbuf_end = charbuf + coding->charbuf_used;
5718   unsigned char *dst = coding->destination + coding->produced;
5719   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5720   int safe_room = MAX_MULTIBYTE_LENGTH;
5721   int produced_chars = 0;
5722   Lisp_Object attrs, charset_list;
5723   int ascii_compatible;
5724   int c;
5725
5726   CODING_GET_INFO (coding, attrs, charset_list);
5727   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5728
5729   while (charbuf < charbuf_end)
5730     {
5731       struct charset *charset;
5732       unsigned code;
5733
5734       ASSURE_DESTINATION (safe_room);
5735       c = *charbuf++;
5736       if (ascii_compatible && ASCII_CHAR_P (c))
5737         EMIT_ONE_ASCII_BYTE (c);
5738       else if (CHAR_BYTE8_P (c))
5739         {
5740           c = CHAR_TO_BYTE8 (c);
5741           EMIT_ONE_BYTE (c);
5742         }
5743       else
5744         {
5745           charset = char_charset (c, charset_list, &code);
5746           if (charset)
5747             {
5748               if (CHARSET_DIMENSION (charset) == 1)
5749                 EMIT_ONE_BYTE (code);
5750               else if (CHARSET_DIMENSION (charset) == 2)
5751                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5752               else if (CHARSET_DIMENSION (charset) == 3)
5753                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5754               else
5755                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5756                                  (code >> 8) & 0xFF, code & 0xFF);
5757             }
5758           else
5759             {
5760               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5761                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5762               else
5763                 c = coding->default_char;
5764               EMIT_ONE_BYTE (c);
5765             }
5766         }
5767     }
5768
5769   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5770   coding->produced_char += produced_chars;
5771   coding->produced = dst - coding->destination;
5772   return 0;
5773 }
5774
5775 \f
5776 /*** 7. C library functions ***/
5777
5778 /* Setup coding context CODING from information about CODING_SYSTEM.
5779    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5780    CODING_SYSTEM is invalid, signal an error.  */
5781
5782 void
5783 setup_coding_system (coding_system, coding)
5784      Lisp_Object coding_system;
5785      struct coding_system *coding;
5786 {
5787   Lisp_Object attrs;
5788   Lisp_Object eol_type;
5789   Lisp_Object coding_type;
5790   Lisp_Object val;
5791
5792   if (NILP (coding_system))
5793     coding_system = Qundecided;
5794
5795   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5796
5797   attrs = CODING_ID_ATTRS (coding->id);
5798   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5799
5800   coding->mode = 0;
5801   coding->head_ascii = -1;
5802   if (VECTORP (eol_type))
5803     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5804                             | CODING_REQUIRE_DETECTION_MASK);
5805   else if (! EQ (eol_type, Qunix))
5806     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5807                             | CODING_REQUIRE_ENCODING_MASK);
5808   else
5809     coding->common_flags = 0;
5810   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5811     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5812   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5813     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5814   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5815     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5816
5817   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5818   coding->max_charset_id = SCHARS (val) - 1;
5819   coding->safe_charsets = SDATA (val);
5820   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5821   coding->carryover_bytes = 0;
5822
5823   coding_type = CODING_ATTR_TYPE (attrs);
5824   if (EQ (coding_type, Qundecided))
5825     {
5826       coding->detector = NULL;
5827       coding->decoder = decode_coding_raw_text;
5828       coding->encoder = encode_coding_raw_text;
5829       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5830     }
5831   else if (EQ (coding_type, Qiso_2022))
5832     {
5833       int i;
5834       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5835
5836       /* Invoke graphic register 0 to plane 0.  */
5837       CODING_ISO_INVOCATION (coding, 0) = 0;
5838       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5839       CODING_ISO_INVOCATION (coding, 1)
5840         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5841       /* Setup the initial status of designation.  */
5842       for (i = 0; i < 4; i++)
5843         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5844       /* Not single shifting initially.  */
5845       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5846       /* Beginning of buffer should also be regarded as bol. */
5847       CODING_ISO_BOL (coding) = 1;
5848       coding->detector = detect_coding_iso_2022;
5849       coding->decoder = decode_coding_iso_2022;
5850       coding->encoder = encode_coding_iso_2022;
5851       if (flags & CODING_ISO_FLAG_SAFE)
5852         coding->mode |= CODING_MODE_SAFE_ENCODING;
5853       coding->common_flags
5854         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5855             | CODING_REQUIRE_FLUSHING_MASK);
5856       if (flags & CODING_ISO_FLAG_COMPOSITION)
5857         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5858       if (flags & CODING_ISO_FLAG_DESIGNATION)
5859         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5860       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5861         {
5862           setup_iso_safe_charsets (attrs);
5863           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5864           coding->max_charset_id = SCHARS (val) - 1;
5865           coding->safe_charsets = SDATA (val);
5866         }
5867       CODING_ISO_FLAGS (coding) = flags;
5868       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5869       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5870       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5871       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5872     }
5873   else if (EQ (coding_type, Qcharset))
5874     {
5875       coding->detector = detect_coding_charset;
5876       coding->decoder = decode_coding_charset;
5877       coding->encoder = encode_coding_charset;
5878       coding->common_flags
5879         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5880     }
5881   else if (EQ (coding_type, Qutf_8))
5882     {
5883       val = AREF (attrs, coding_attr_utf_bom);
5884       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5885                                    : EQ (val, Qt) ? utf_with_bom
5886                                    : utf_without_bom);
5887       coding->detector = detect_coding_utf_8;
5888       coding->decoder = decode_coding_utf_8;
5889       coding->encoder = encode_coding_utf_8;
5890       coding->common_flags
5891         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5892       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5893         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5894     }
5895   else if (EQ (coding_type, Qutf_16))
5896     {
5897       val = AREF (attrs, coding_attr_utf_bom);
5898       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5899                                     : EQ (val, Qt) ? utf_with_bom
5900                                     : utf_without_bom);
5901       val = AREF (attrs, coding_attr_utf_16_endian);
5902       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5903                                        : utf_16_little_endian);
5904       CODING_UTF_16_SURROGATE (coding) = 0;
5905       coding->detector = detect_coding_utf_16;
5906       coding->decoder = decode_coding_utf_16;
5907       coding->encoder = encode_coding_utf_16;
5908       coding->common_flags
5909         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5910       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5911         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5912     }
5913   else if (EQ (coding_type, Qccl))
5914     {
5915       coding->detector = detect_coding_ccl;
5916       coding->decoder = decode_coding_ccl;
5917       coding->encoder = encode_coding_ccl;
5918       coding->common_flags
5919         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5920             | CODING_REQUIRE_FLUSHING_MASK);
5921     }
5922   else if (EQ (coding_type, Qemacs_mule))
5923     {
5924       coding->detector = detect_coding_emacs_mule;
5925       coding->decoder = decode_coding_emacs_mule;
5926       coding->encoder = encode_coding_emacs_mule;
5927       coding->common_flags
5928         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5929       coding->spec.emacs_mule.full_support = 1;
5930       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5931           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5932         {
5933           Lisp_Object tail, safe_charsets;
5934           int max_charset_id = 0;
5935
5936           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5937                tail = XCDR (tail))
5938             if (max_charset_id < XFASTINT (XCAR (tail)))
5939               max_charset_id = XFASTINT (XCAR (tail));
5940           safe_charsets = make_uninit_string (max_charset_id + 1);
5941           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5942           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5943                tail = XCDR (tail))
5944             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5945           coding->max_charset_id = max_charset_id;
5946           coding->safe_charsets = SDATA (safe_charsets);
5947           coding->spec.emacs_mule.full_support = 1;
5948         }
5949       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5950       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5951     }
5952   else if (EQ (coding_type, Qshift_jis))
5953     {
5954       coding->detector = detect_coding_sjis;
5955       coding->decoder = decode_coding_sjis;
5956       coding->encoder = encode_coding_sjis;
5957       coding->common_flags
5958         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5959     }
5960   else if (EQ (coding_type, Qbig5))
5961     {
5962       coding->detector = detect_coding_big5;
5963       coding->decoder = decode_coding_big5;
5964       coding->encoder = encode_coding_big5;
5965       coding->common_flags
5966         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5967     }
5968   else                          /* EQ (coding_type, Qraw_text) */
5969     {
5970       coding->detector = NULL;
5971       coding->decoder = decode_coding_raw_text;
5972       coding->encoder = encode_coding_raw_text;
5973       if (! EQ (eol_type, Qunix))
5974         {
5975           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5976           if (! VECTORP (eol_type))
5977             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5978         }
5979
5980     }
5981
5982   return;
5983 }
5984
5985 /* Return a list of charsets supported by CODING.  */
5986
5987 Lisp_Object
5988 coding_charset_list (coding)
5989      struct coding_system *coding;
5990 {
5991   Lisp_Object attrs, charset_list;
5992
5993   CODING_GET_INFO (coding, attrs, charset_list);
5994   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5995     {
5996       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5997
5998       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5999         charset_list = Viso_2022_charset_list;
6000     }
6001   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
6002     {
6003       charset_list = Vemacs_mule_charset_list;
6004     }
6005   return charset_list;
6006 }
6007
6008
6009 /* Return a list of charsets supported by CODING-SYSTEM.  */
6010
6011 Lisp_Object
6012 coding_system_charset_list (coding_system)
6013      Lisp_Object coding_system;
6014 {
6015   int id;
6016   Lisp_Object attrs, charset_list;
6017
6018   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
6019   attrs = CODING_ID_ATTRS (id);
6020
6021   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
6022     {
6023       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
6024
6025       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
6026         charset_list = Viso_2022_charset_list;
6027       else
6028         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6029     }
6030   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
6031     {
6032       charset_list = Vemacs_mule_charset_list;
6033     }
6034   else
6035     {
6036       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6037     }
6038   return charset_list;
6039 }
6040
6041
6042 /* Return raw-text or one of its subsidiaries that has the same
6043    eol_type as CODING-SYSTEM.  */
6044
6045 Lisp_Object
6046 raw_text_coding_system (coding_system)
6047      Lisp_Object coding_system;
6048 {
6049   Lisp_Object spec, attrs;
6050   Lisp_Object eol_type, raw_text_eol_type;
6051
6052   if (NILP (coding_system))
6053     return Qraw_text;
6054   spec = CODING_SYSTEM_SPEC (coding_system);
6055   attrs = AREF (spec, 0);
6056
6057   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
6058     return coding_system;
6059
6060   eol_type = AREF (spec, 2);
6061   if (VECTORP (eol_type))
6062     return Qraw_text;
6063   spec = CODING_SYSTEM_SPEC (Qraw_text);
6064   raw_text_eol_type = AREF (spec, 2);
6065   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6066           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6067           : AREF (raw_text_eol_type, 2));
6068 }
6069
6070
6071 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
6072    does, return one of the subsidiary that has the same eol-spec as
6073    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
6074    inherit end-of-line format from the system's setting
6075    (system_eol_type).  */
6076
6077 Lisp_Object
6078 coding_inherit_eol_type (coding_system, parent)
6079      Lisp_Object coding_system, parent;
6080 {
6081   Lisp_Object spec, eol_type;
6082
6083   if (NILP (coding_system))
6084     coding_system = Qraw_text;
6085   spec = CODING_SYSTEM_SPEC (coding_system);
6086   eol_type = AREF (spec, 2);
6087   if (VECTORP (eol_type))
6088     {
6089       Lisp_Object parent_eol_type;
6090
6091       if (! NILP (parent))
6092         {
6093           Lisp_Object parent_spec;
6094
6095           parent_spec = CODING_SYSTEM_SPEC (parent);
6096           parent_eol_type = AREF (parent_spec, 2);
6097         }
6098       else
6099         parent_eol_type = system_eol_type;
6100       if (EQ (parent_eol_type, Qunix))
6101         coding_system = AREF (eol_type, 0);
6102       else if (EQ (parent_eol_type, Qdos))
6103         coding_system = AREF (eol_type, 1);
6104       else if (EQ (parent_eol_type, Qmac))
6105         coding_system = AREF (eol_type, 2);
6106     }
6107   return coding_system;
6108 }
6109
6110 /* Emacs has a mechanism to automatically detect a coding system if it
6111    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6112    it's impossible to distinguish some coding systems accurately
6113    because they use the same range of codes.  So, at first, coding
6114    systems are categorized into 7, those are:
6115
6116    o coding-category-emacs-mule
6117
6118         The category for a coding system which has the same code range
6119         as Emacs' internal format.  Assigned the coding-system (Lisp
6120         symbol) `emacs-mule' by default.
6121
6122    o coding-category-sjis
6123
6124         The category for a coding system which has the same code range
6125         as SJIS.  Assigned the coding-system (Lisp
6126         symbol) `japanese-shift-jis' by default.
6127
6128    o coding-category-iso-7
6129
6130         The category for a coding system which has the same code range
6131         as ISO2022 of 7-bit environment.  This doesn't use any locking
6132         shift and single shift functions.  This can encode/decode all
6133         charsets.  Assigned the coding-system (Lisp symbol)
6134         `iso-2022-7bit' by default.
6135
6136    o coding-category-iso-7-tight
6137
6138         Same as coding-category-iso-7 except that this can
6139         encode/decode only the specified charsets.
6140
6141    o coding-category-iso-8-1
6142
6143         The category for a coding system which has the same code range
6144         as ISO2022 of 8-bit environment and graphic plane 1 used only
6145         for DIMENSION1 charset.  This doesn't use any locking shift
6146         and single shift functions.  Assigned the coding-system (Lisp
6147         symbol) `iso-latin-1' by default.
6148
6149    o coding-category-iso-8-2
6150
6151         The category for a coding system which has the same code range
6152         as ISO2022 of 8-bit environment and graphic plane 1 used only
6153         for DIMENSION2 charset.  This doesn't use any locking shift
6154         and single shift functions.  Assigned the coding-system (Lisp
6155         symbol) `japanese-iso-8bit' by default.
6156
6157    o coding-category-iso-7-else
6158
6159         The category for a coding system which has the same code range
6160         as ISO2022 of 7-bit environemnt but uses locking shift or
6161         single shift functions.  Assigned the coding-system (Lisp
6162         symbol) `iso-2022-7bit-lock' by default.
6163
6164    o coding-category-iso-8-else
6165
6166         The category for a coding system which has the same code range
6167         as ISO2022 of 8-bit environemnt but uses locking shift or
6168         single shift functions.  Assigned the coding-system (Lisp
6169         symbol) `iso-2022-8bit-ss2' by default.
6170
6171    o coding-category-big5
6172
6173         The category for a coding system which has the same code range
6174         as BIG5.  Assigned the coding-system (Lisp symbol)
6175         `cn-big5' by default.
6176
6177    o coding-category-utf-8
6178
6179         The category for a coding system which has the same code range
6180         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6181         symbol) `utf-8' by default.
6182
6183    o coding-category-utf-16-be
6184
6185         The category for a coding system in which a text has an
6186         Unicode signature (cf. Unicode Standard) in the order of BIG
6187         endian at the head.  Assigned the coding-system (Lisp symbol)
6188         `utf-16-be' by default.
6189
6190    o coding-category-utf-16-le
6191
6192         The category for a coding system in which a text has an
6193         Unicode signature (cf. Unicode Standard) in the order of
6194         LITTLE endian at the head.  Assigned the coding-system (Lisp
6195         symbol) `utf-16-le' by default.
6196
6197    o coding-category-ccl
6198
6199         The category for a coding system of which encoder/decoder is
6200         written in CCL programs.  The default value is nil, i.e., no
6201         coding system is assigned.
6202
6203    o coding-category-binary
6204
6205         The category for a coding system not categorized in any of the
6206         above.  Assigned the coding-system (Lisp symbol)
6207         `no-conversion' by default.
6208
6209    Each of them is a Lisp symbol and the value is an actual
6210    `coding-system's (this is also a Lisp symbol) assigned by a user.
6211    What Emacs does actually is to detect a category of coding system.
6212    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6213    decide only one possible category, it selects a category of the
6214    highest priority.  Priorities of categories are also specified by a
6215    user in a Lisp variable `coding-category-list'.
6216
6217 */
6218
6219 #define EOL_SEEN_NONE   0
6220 #define EOL_SEEN_LF     1
6221 #define EOL_SEEN_CR     2
6222 #define EOL_SEEN_CRLF   4
6223
6224 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6225    SOURCE is encoded.  If CATEGORY is one of
6226    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6227    two-byte, else they are encoded by one-byte.
6228
6229    Return one of EOL_SEEN_XXX.  */
6230
6231 #define MAX_EOL_CHECK_COUNT 3
6232
6233 static int
6234 detect_eol (source, src_bytes, category)
6235      const unsigned char *source;
6236      EMACS_INT src_bytes;
6237      enum coding_category category;
6238 {
6239   const unsigned char *src = source, *src_end = src + src_bytes;
6240   unsigned char c;
6241   int total  = 0;
6242   int eol_seen = EOL_SEEN_NONE;
6243
6244   if ((1 << category) & CATEGORY_MASK_UTF_16)
6245     {
6246       int msb, lsb;
6247
6248       msb = category == (coding_category_utf_16_le
6249                          | coding_category_utf_16_le_nosig);
6250       lsb = 1 - msb;
6251
6252       while (src + 1 < src_end)
6253         {
6254           c = src[lsb];
6255           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6256             {
6257               int this_eol;
6258
6259               if (c == '\n')
6260                 this_eol = EOL_SEEN_LF;
6261               else if (src + 3 >= src_end
6262                        || src[msb + 2] != 0
6263                        || src[lsb + 2] != '\n')
6264                 this_eol = EOL_SEEN_CR;
6265               else
6266                 {
6267                   this_eol = EOL_SEEN_CRLF;
6268                   src += 2;
6269                 }
6270
6271               if (eol_seen == EOL_SEEN_NONE)
6272                 /* This is the first end-of-line.  */
6273                 eol_seen = this_eol;
6274               else if (eol_seen != this_eol)
6275                 {
6276                   /* The found type is different from what found before.
6277                      Allow for stray ^M characters in DOS EOL files.  */
6278                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6279                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6280                     eol_seen = EOL_SEEN_CRLF;
6281                   else
6282                     {
6283                       eol_seen = EOL_SEEN_LF;
6284                       break;
6285                     }
6286                 }
6287               if (++total == MAX_EOL_CHECK_COUNT)
6288                 break;
6289             }
6290           src += 2;
6291         }
6292     }
6293   else
6294     {
6295       while (src < src_end)
6296         {
6297           c = *src++;
6298           if (c == '\n' || c == '\r')
6299             {
6300               int this_eol;
6301
6302               if (c == '\n')
6303                 this_eol = EOL_SEEN_LF;
6304               else if (src >= src_end || *src != '\n')
6305                 this_eol = EOL_SEEN_CR;
6306               else
6307                 this_eol = EOL_SEEN_CRLF, src++;
6308
6309               if (eol_seen == EOL_SEEN_NONE)
6310                 /* This is the first end-of-line.  */
6311                 eol_seen = this_eol;
6312               else if (eol_seen != this_eol)
6313                 {
6314                   /* The found type is different from what found before.
6315                      Allow for stray ^M characters in DOS EOL files.  */
6316                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6317                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6318                     eol_seen = EOL_SEEN_CRLF;
6319                   else
6320                     {
6321                       eol_seen = EOL_SEEN_LF;
6322                       break;
6323                     }
6324                 }
6325               if (++total == MAX_EOL_CHECK_COUNT)
6326                 break;
6327             }
6328         }
6329     }
6330   return eol_seen;
6331 }
6332
6333
6334 static Lisp_Object
6335 adjust_coding_eol_type (coding, eol_seen)
6336      struct coding_system *coding;
6337      int eol_seen;
6338 {
6339   Lisp_Object eol_type;
6340
6341   eol_type = CODING_ID_EOL_TYPE (coding->id);
6342   if (eol_seen & EOL_SEEN_LF)
6343     {
6344       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6345       eol_type = Qunix;
6346     }
6347   else if (eol_seen & EOL_SEEN_CRLF)
6348     {
6349       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6350       eol_type = Qdos;
6351     }
6352   else if (eol_seen & EOL_SEEN_CR)
6353     {
6354       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6355       eol_type = Qmac;
6356     }
6357   return eol_type;
6358 }
6359
6360 /* Detect how a text specified in CODING is encoded.  If a coding
6361    system is detected, update fields of CODING by the detected coding
6362    system.  */
6363
6364 void
6365 detect_coding (coding)
6366      struct coding_system *coding;
6367 {
6368   const unsigned char *src, *src_end;
6369   int saved_mode = coding->mode;
6370
6371   coding->consumed = coding->consumed_char = 0;
6372   coding->produced = coding->produced_char = 0;
6373   coding_set_source (coding);
6374
6375   src_end = coding->source + coding->src_bytes;
6376   coding->head_ascii = 0;
6377
6378   /* If we have not yet decided the text encoding type, detect it
6379      now.  */
6380   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6381     {
6382       int c, i;
6383       struct coding_detection_info detect_info;
6384       int null_byte_found = 0, eight_bit_found = 0;
6385
6386       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6387       for (src = coding->source; src < src_end; src++)
6388         {
6389           c = *src;
6390           if (c & 0x80)
6391             {
6392               eight_bit_found = 1;
6393               if (null_byte_found)
6394                 break;
6395             }
6396           else if (c < 0x20)
6397             {
6398               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6399                   && ! inhibit_iso_escape_detection
6400                   && ! detect_info.checked)
6401                 {
6402                   if (detect_coding_iso_2022 (coding, &detect_info))
6403                     {
6404                       /* We have scanned the whole data.  */
6405                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6406                         {
6407                           /* We didn't find an 8-bit code.  We may
6408                              have found a null-byte, but it's very
6409                              rare that a binary file confirm to
6410                              ISO-2022.  */
6411                           src = src_end;
6412                           coding->head_ascii = src - coding->source;
6413                         }
6414                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6415                       break;
6416                     }
6417                 }
6418               else if (! c && !inhibit_null_byte_detection)
6419                 {
6420                   null_byte_found = 1;
6421                   if (eight_bit_found)
6422                     break;
6423                 }
6424               if (! eight_bit_found)
6425                 coding->head_ascii++;
6426             }
6427           else if (! eight_bit_found)
6428             coding->head_ascii++;
6429         }
6430
6431       if (null_byte_found || eight_bit_found
6432           || coding->head_ascii < coding->src_bytes
6433           || detect_info.found)
6434         {
6435           enum coding_category category;
6436           struct coding_system *this;
6437
6438           if (coding->head_ascii == coding->src_bytes)
6439             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6440             for (i = 0; i < coding_category_raw_text; i++)
6441               {
6442                 category = coding_priorities[i];
6443                 this = coding_categories + category;
6444                 if (detect_info.found & (1 << category))
6445                   break;
6446               }
6447           else
6448             {
6449               if (null_byte_found)
6450                 {
6451                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6452                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6453                 }
6454               for (i = 0; i < coding_category_raw_text; i++)
6455                 {
6456                   category = coding_priorities[i];
6457                   this = coding_categories + category;
6458                   if (this->id < 0)
6459                     {
6460                       /* No coding system of this category is defined.  */
6461                       detect_info.rejected |= (1 << category);
6462                     }
6463                   else if (category >= coding_category_raw_text)
6464                     continue;
6465                   else if (detect_info.checked & (1 << category))
6466                     {
6467                       if (detect_info.found & (1 << category))
6468                         break;
6469                     }
6470                   else if ((*(this->detector)) (coding, &detect_info)
6471                            && detect_info.found & (1 << category))
6472                     {
6473                       if (category == coding_category_utf_16_auto)
6474                         {
6475                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6476                             category = coding_category_utf_16_le;
6477                           else
6478                             category = coding_category_utf_16_be;
6479                         }
6480                       break;
6481                     }
6482                 }
6483             }
6484
6485           if (i < coding_category_raw_text)
6486             setup_coding_system (CODING_ID_NAME (this->id), coding);
6487           else if (null_byte_found)
6488             setup_coding_system (Qno_conversion, coding);
6489           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6490                    == CATEGORY_MASK_ANY)
6491             setup_coding_system (Qraw_text, coding);
6492           else if (detect_info.rejected)
6493             for (i = 0; i < coding_category_raw_text; i++)
6494               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6495                 {
6496                   this = coding_categories + coding_priorities[i];
6497                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6498                   break;
6499                 }
6500         }
6501     }
6502   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6503            == coding_category_utf_8_auto)
6504     {
6505       Lisp_Object coding_systems;
6506       struct coding_detection_info detect_info;
6507
6508       coding_systems
6509         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6510       detect_info.found = detect_info.rejected = 0;
6511       coding->head_ascii = 0;
6512       if (CONSP (coding_systems)
6513           && detect_coding_utf_8 (coding, &detect_info))
6514         {
6515           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6516             setup_coding_system (XCAR (coding_systems), coding);
6517           else
6518             setup_coding_system (XCDR (coding_systems), coding);
6519         }
6520     }
6521   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6522            == coding_category_utf_16_auto)
6523     {
6524       Lisp_Object coding_systems;
6525       struct coding_detection_info detect_info;
6526
6527       coding_systems
6528         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6529       detect_info.found = detect_info.rejected = 0;
6530       coding->head_ascii = 0;
6531       if (CONSP (coding_systems)
6532           && detect_coding_utf_16 (coding, &detect_info))
6533         {
6534           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6535             setup_coding_system (XCAR (coding_systems), coding);
6536           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6537             setup_coding_system (XCDR (coding_systems), coding);
6538         }
6539     }
6540   coding->mode = saved_mode;
6541 }
6542
6543
6544 static void
6545 decode_eol (coding)
6546      struct coding_system *coding;
6547 {
6548   Lisp_Object eol_type;
6549   unsigned char *p, *pbeg, *pend;
6550
6551   eol_type = CODING_ID_EOL_TYPE (coding->id);
6552   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6553     return;
6554
6555   if (NILP (coding->dst_object))
6556     pbeg = coding->destination;
6557   else
6558     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6559   pend = pbeg + coding->produced;
6560
6561   if (VECTORP (eol_type))
6562     {
6563       int eol_seen = EOL_SEEN_NONE;
6564
6565       for (p = pbeg; p < pend; p++)
6566         {
6567           if (*p == '\n')
6568             eol_seen |= EOL_SEEN_LF;
6569           else if (*p == '\r')
6570             {
6571               if (p + 1 < pend && *(p + 1) == '\n')
6572                 {
6573                   eol_seen |= EOL_SEEN_CRLF;
6574                   p++;
6575                 }
6576               else
6577                 eol_seen |= EOL_SEEN_CR;
6578             }
6579         }
6580       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6581       if ((eol_seen & EOL_SEEN_CRLF) != 0
6582           && (eol_seen & EOL_SEEN_CR) != 0
6583           && (eol_seen & EOL_SEEN_LF) == 0)
6584         eol_seen = EOL_SEEN_CRLF;
6585       else if (eol_seen != EOL_SEEN_NONE
6586           && eol_seen != EOL_SEEN_LF
6587           && eol_seen != EOL_SEEN_CRLF
6588           && eol_seen != EOL_SEEN_CR)
6589         eol_seen = EOL_SEEN_LF;
6590       if (eol_seen != EOL_SEEN_NONE)
6591         eol_type = adjust_coding_eol_type (coding, eol_seen);
6592     }
6593
6594   if (EQ (eol_type, Qmac))
6595     {
6596       for (p = pbeg; p < pend; p++)
6597         if (*p == '\r')
6598           *p = '\n';
6599     }
6600   else if (EQ (eol_type, Qdos))
6601     {
6602       int n = 0;
6603
6604       if (NILP (coding->dst_object))
6605         {
6606           /* Start deleting '\r' from the tail to minimize the memory
6607              movement.  */
6608           for (p = pend - 2; p >= pbeg; p--)
6609             if (*p == '\r')
6610               {
6611                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6612                 n++;
6613               }
6614         }
6615       else
6616         {
6617           int pos_byte = coding->dst_pos_byte;
6618           int pos = coding->dst_pos;
6619           int pos_end = pos + coding->produced_char - 1;
6620
6621           while (pos < pos_end)
6622             {
6623               p = BYTE_POS_ADDR (pos_byte);
6624               if (*p == '\r' && p[1] == '\n')
6625                 {
6626                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6627                   n++;
6628                   pos_end--;
6629                 }
6630               pos++;
6631               if (coding->dst_multibyte)
6632                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6633               else
6634                 pos_byte++;
6635             }
6636         }
6637       coding->produced -= n;
6638       coding->produced_char -= n;
6639     }
6640 }
6641
6642
6643 /* Return a translation table (or list of them) from coding system
6644    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6645    decoding (ENCODEP is zero). */
6646
6647 static Lisp_Object
6648 get_translation_table (attrs, encodep, max_lookup)
6649      Lisp_Object attrs;
6650      int encodep, *max_lookup;
6651 {
6652   Lisp_Object standard, translation_table;
6653   Lisp_Object val;
6654
6655   if (NILP (Venable_character_translation))
6656     {
6657       if (max_lookup)
6658         *max_lookup = 0;
6659       return Qnil;
6660     }
6661   if (encodep)
6662     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6663       standard = Vstandard_translation_table_for_encode;
6664   else
6665     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6666       standard = Vstandard_translation_table_for_decode;
6667   if (NILP (translation_table))
6668     translation_table = standard;
6669   else
6670     {
6671       if (SYMBOLP (translation_table))
6672         translation_table = Fget (translation_table, Qtranslation_table);
6673       else if (CONSP (translation_table))
6674         {
6675           translation_table = Fcopy_sequence (translation_table);
6676           for (val = translation_table; CONSP (val); val = XCDR (val))
6677             if (SYMBOLP (XCAR (val)))
6678               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6679         }
6680       if (CHAR_TABLE_P (standard))
6681         {
6682           if (CONSP (translation_table))
6683             translation_table = nconc2 (translation_table,
6684                                         Fcons (standard, Qnil));
6685           else
6686             translation_table = Fcons (translation_table,
6687                                        Fcons (standard, Qnil));
6688         }
6689     }
6690
6691   if (max_lookup)
6692     {
6693       *max_lookup = 1;
6694       if (CHAR_TABLE_P (translation_table)
6695           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6696         {
6697           val = XCHAR_TABLE (translation_table)->extras[1];
6698           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6699             *max_lookup = XFASTINT (val);
6700         }
6701       else if (CONSP (translation_table))
6702         {
6703           Lisp_Object tail, val;
6704
6705           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6706             if (CHAR_TABLE_P (XCAR (tail))
6707                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6708               {
6709                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6710                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6711                   *max_lookup = XFASTINT (val);
6712               }
6713         }
6714     }
6715   return translation_table;
6716 }
6717
6718 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6719   do {                                                          \
6720     trans = Qnil;                                               \
6721     if (CHAR_TABLE_P (table))                                   \
6722       {                                                         \
6723         trans = CHAR_TABLE_REF (table, c);                      \
6724         if (CHARACTERP (trans))                                 \
6725           c = XFASTINT (trans), trans = Qnil;                   \
6726       }                                                         \
6727     else if (CONSP (table))                                     \
6728       {                                                         \
6729         Lisp_Object tail;                                       \
6730                                                                 \
6731         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6732           if (CHAR_TABLE_P (XCAR (tail)))                       \
6733             {                                                   \
6734               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6735               if (CHARACTERP (trans))                           \
6736                 c = XFASTINT (trans), trans = Qnil;             \
6737               else if (! NILP (trans))                          \
6738                 break;                                          \
6739             }                                                   \
6740       }                                                         \
6741   } while (0)
6742
6743
6744 /* Return a translation of character(s) at BUF according to TRANS.
6745    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6746    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6747    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6748    translation is found, and Qnil if not found..
6749    If BUF is too short to lookup characters in FROM, return Qt.  */
6750
6751 static Lisp_Object
6752 get_translation (trans, buf, buf_end)
6753      Lisp_Object trans;
6754      int *buf, *buf_end;
6755 {
6756
6757   if (INTEGERP (trans))
6758     return trans;
6759   for (; CONSP (trans); trans = XCDR (trans))
6760     {
6761       Lisp_Object val = XCAR (trans);
6762       Lisp_Object from = XCAR (val);
6763       int len = ASIZE (from);
6764       int i;
6765
6766       for (i = 0; i < len; i++)
6767         {
6768           if (buf + i == buf_end)
6769             return Qt;
6770           if (XINT (AREF (from, i)) != buf[i])
6771             break;
6772         }
6773       if (i == len)
6774         return val;
6775     }
6776   return Qnil;
6777 }
6778
6779
6780 static int
6781 produce_chars (coding, translation_table, last_block)
6782      struct coding_system *coding;
6783      Lisp_Object translation_table;
6784      int last_block;
6785 {
6786   unsigned char *dst = coding->destination + coding->produced;
6787   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6788   EMACS_INT produced;
6789   EMACS_INT produced_chars = 0;
6790   int carryover = 0;
6791
6792   if (! coding->chars_at_source)
6793     {
6794       /* Source characters are in coding->charbuf.  */
6795       int *buf = coding->charbuf;
6796       int *buf_end = buf + coding->charbuf_used;
6797
6798       if (EQ (coding->src_object, coding->dst_object))
6799         {
6800           coding_set_source (coding);
6801           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6802         }
6803
6804       while (buf < buf_end)
6805         {
6806           int c = *buf, i;
6807
6808           if (c >= 0)
6809             {
6810               int from_nchars = 1, to_nchars = 1;
6811               Lisp_Object trans = Qnil;
6812
6813               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6814               if (! NILP (trans))
6815                 {
6816                   trans = get_translation (trans, buf, buf_end);
6817                   if (INTEGERP (trans))
6818                     c = XINT (trans);
6819                   else if (CONSP (trans))
6820                     {
6821                       from_nchars = ASIZE (XCAR (trans));
6822                       trans = XCDR (trans);
6823                       if (INTEGERP (trans))
6824                         c = XINT (trans);
6825                       else
6826                         {
6827                           to_nchars = ASIZE (trans);
6828                           c = XINT (AREF (trans, 0));
6829                         }
6830                     }
6831                   else if (EQ (trans, Qt) && ! last_block)
6832                     break;
6833                 }
6834
6835               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6836                 {
6837                   dst = alloc_destination (coding,
6838                                            buf_end - buf
6839                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6840                                            dst);
6841                   if (EQ (coding->src_object, coding->dst_object))
6842                     {
6843                       coding_set_source (coding);
6844                       dst_end = (((unsigned char *) coding->source)
6845                                  + coding->consumed);
6846                     }
6847                   else
6848                     dst_end = coding->destination + coding->dst_bytes;
6849                 }
6850
6851               for (i = 0; i < to_nchars; i++)
6852                 {
6853                   if (i > 0)
6854                     c = XINT (AREF (trans, i));
6855                   if (coding->dst_multibyte
6856                       || ! CHAR_BYTE8_P (c))
6857                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6858                   else
6859                     *dst++ = CHAR_TO_BYTE8 (c);
6860                 }
6861               produced_chars += to_nchars;
6862               buf += from_nchars;
6863             }
6864           else
6865             /* This is an annotation datum.  (-C) is the length.  */
6866             buf += -c;
6867         }
6868       carryover = buf_end - buf;
6869     }
6870   else
6871     {
6872       /* Source characters are at coding->source.  */
6873       const unsigned char *src = coding->source;
6874       const unsigned char *src_end = src + coding->consumed;
6875
6876       if (EQ (coding->dst_object, coding->src_object))
6877         dst_end = (unsigned char *) src;
6878       if (coding->src_multibyte != coding->dst_multibyte)
6879         {
6880           if (coding->src_multibyte)
6881             {
6882               int multibytep = 1;
6883               EMACS_INT consumed_chars = 0;
6884
6885               while (1)
6886                 {
6887                   const unsigned char *src_base = src;
6888                   int c;
6889
6890                   ONE_MORE_BYTE (c);
6891                   if (dst == dst_end)
6892                     {
6893                       if (EQ (coding->src_object, coding->dst_object))
6894                         dst_end = (unsigned char *) src;
6895                       if (dst == dst_end)
6896                         {
6897                           EMACS_INT offset = src - coding->source;
6898
6899                           dst = alloc_destination (coding, src_end - src + 1,
6900                                                    dst);
6901                           dst_end = coding->destination + coding->dst_bytes;
6902                           coding_set_source (coding);
6903                           src = coding->source + offset;
6904                           src_end = coding->source + coding->src_bytes;
6905                           if (EQ (coding->src_object, coding->dst_object))
6906                             dst_end = (unsigned char *) src;
6907                         }
6908                     }
6909                   *dst++ = c;
6910                   produced_chars++;
6911                 }
6912             no_more_source:
6913               ;
6914             }
6915           else
6916             while (src < src_end)
6917               {
6918                 int multibytep = 1;
6919                 int c = *src++;
6920
6921                 if (dst >= dst_end - 1)
6922                   {
6923                     if (EQ (coding->src_object, coding->dst_object))
6924                       dst_end = (unsigned char *) src;
6925                     if (dst >= dst_end - 1)
6926                       {
6927                         EMACS_INT offset = src - coding->source;
6928                         EMACS_INT more_bytes;
6929
6930                         if (EQ (coding->src_object, coding->dst_object))
6931                           more_bytes = ((src_end - src) / 2) + 2;
6932                         else
6933                           more_bytes = src_end - src + 2;
6934                         dst = alloc_destination (coding, more_bytes, dst);
6935                         dst_end = coding->destination + coding->dst_bytes;
6936                         coding_set_source (coding);
6937                         src = coding->source + offset;
6938                         src_end = coding->source + coding->src_bytes;
6939                         if (EQ (coding->src_object, coding->dst_object))
6940                           dst_end = (unsigned char *) src;
6941                       }
6942                   }
6943                 EMIT_ONE_BYTE (c);
6944               }
6945         }
6946       else
6947         {
6948           if (!EQ (coding->src_object, coding->dst_object))
6949             {
6950               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6951
6952               if (require > 0)
6953                 {
6954                   EMACS_INT offset = src - coding->source;
6955
6956                   dst = alloc_destination (coding, require, dst);
6957                   coding_set_source (coding);
6958                   src = coding->source + offset;
6959                   src_end = coding->source + coding->src_bytes;
6960                 }
6961             }
6962           produced_chars = coding->consumed_char;
6963           while (src < src_end)
6964             *dst++ = *src++;
6965         }
6966     }
6967
6968   produced = dst - (coding->destination + coding->produced);
6969   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6970     insert_from_gap (produced_chars, produced);
6971   coding->produced += produced;
6972   coding->produced_char += produced_chars;
6973   return carryover;
6974 }
6975
6976 /* Compose text in CODING->object according to the annotation data at
6977    CHARBUF.  CHARBUF is an array:
6978      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6979  */
6980
6981 static INLINE void
6982 produce_composition (coding, charbuf, pos)
6983      struct coding_system *coding;
6984      int *charbuf;
6985      EMACS_INT pos;
6986 {
6987   int len;
6988   EMACS_INT to;
6989   enum composition_method method;
6990   Lisp_Object components;
6991
6992   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6993   to = pos + charbuf[2];
6994   method = (enum composition_method) (charbuf[4]);
6995
6996   if (method == COMPOSITION_RELATIVE)
6997     components = Qnil;
6998   else
6999     {
7000       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
7001       int i, j;
7002
7003       if (method == COMPOSITION_WITH_RULE)
7004         len = charbuf[2] * 3 - 2;
7005       charbuf += MAX_ANNOTATION_LENGTH;
7006       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
7007       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
7008         {
7009           if (charbuf[i] >= 0)
7010             args[j] = make_number (charbuf[i]);
7011           else
7012             {
7013               i++;
7014               args[j] = make_number (charbuf[i] % 0x100);
7015             }
7016         }
7017       components = (i == j ? Fstring (j, args) : Fvector (j, args));
7018     }
7019   compose_text (pos, to, components, Qnil, coding->dst_object);
7020 }
7021
7022
7023 /* Put `charset' property on text in CODING->object according to
7024    the annotation data at CHARBUF.  CHARBUF is an array:
7025      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
7026  */
7027
7028 static INLINE void
7029 produce_charset (coding, charbuf, pos)
7030      struct coding_system *coding;
7031      int *charbuf;
7032      EMACS_INT pos;
7033 {
7034   EMACS_INT from = pos - charbuf[2];
7035   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
7036
7037   Fput_text_property (make_number (from), make_number (pos),
7038                       Qcharset, CHARSET_NAME (charset),
7039                       coding->dst_object);
7040 }
7041
7042
7043 #define CHARBUF_SIZE 0x4000
7044
7045 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
7046   do {                                                                  \
7047     int size = CHARBUF_SIZE;                                            \
7048                                                                         \
7049     coding->charbuf = NULL;                                             \
7050     while (size > 1024)                                                 \
7051       {                                                                 \
7052         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
7053         if (coding->charbuf)                                            \
7054           break;                                                        \
7055         size >>= 1;                                                     \
7056       }                                                                 \
7057     if (! coding->charbuf)                                              \
7058       {                                                                 \
7059         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
7060         return coding->result;                                          \
7061       }                                                                 \
7062     coding->charbuf_size = size;                                        \
7063   } while (0)
7064
7065
7066 static void
7067 produce_annotation (coding, pos)
7068      struct coding_system *coding;
7069      EMACS_INT pos;
7070 {
7071   int *charbuf = coding->charbuf;
7072   int *charbuf_end = charbuf + coding->charbuf_used;
7073
7074   if (NILP (coding->dst_object))
7075     return;
7076
7077   while (charbuf < charbuf_end)
7078     {
7079       if (*charbuf >= 0)
7080         pos++, charbuf++;
7081       else
7082         {
7083           int len = -*charbuf;
7084
7085           if (len > 2)
7086             switch (charbuf[1])
7087               {
7088               case CODING_ANNOTATE_COMPOSITION_MASK:
7089                 produce_composition (coding, charbuf, pos);
7090                 break;
7091               case CODING_ANNOTATE_CHARSET_MASK:
7092                 produce_charset (coding, charbuf, pos);
7093                 break;
7094               }
7095           charbuf += len;
7096         }
7097     }
7098 }
7099
7100 /* Decode the data at CODING->src_object into CODING->dst_object.
7101    CODING->src_object is a buffer, a string, or nil.
7102    CODING->dst_object is a buffer.
7103
7104    If CODING->src_object is a buffer, it must be the current buffer.
7105    In this case, if CODING->src_pos is positive, it is a position of
7106    the source text in the buffer, otherwise, the source text is in the
7107    gap area of the buffer, and CODING->src_pos specifies the offset of
7108    the text from GPT (which must be the same as PT).  If this is the
7109    same buffer as CODING->dst_object, CODING->src_pos must be
7110    negative.
7111
7112    If CODING->src_object is a string, CODING->src_pos is an index to
7113    that string.
7114
7115    If CODING->src_object is nil, CODING->source must already point to
7116    the non-relocatable memory area.  In this case, CODING->src_pos is
7117    an offset from CODING->source.
7118
7119    The decoded data is inserted at the current point of the buffer
7120    CODING->dst_object.
7121 */
7122
7123 static int
7124 decode_coding (coding)
7125      struct coding_system *coding;
7126 {
7127   Lisp_Object attrs;
7128   Lisp_Object undo_list;
7129   Lisp_Object translation_table;
7130   struct ccl_spec cclspec;
7131   int carryover;
7132   int i;
7133
7134   if (BUFFERP (coding->src_object)
7135       && coding->src_pos > 0
7136       && coding->src_pos < GPT
7137       && coding->src_pos + coding->src_chars > GPT)
7138     move_gap_both (coding->src_pos, coding->src_pos_byte);
7139
7140   undo_list = Qt;
7141   if (BUFFERP (coding->dst_object))
7142     {
7143       if (current_buffer != XBUFFER (coding->dst_object))
7144         set_buffer_internal (XBUFFER (coding->dst_object));
7145       if (GPT != PT)
7146         move_gap_both (PT, PT_BYTE);
7147       undo_list = current_buffer->undo_list;
7148       current_buffer->undo_list = Qt;
7149     }
7150
7151   coding->consumed = coding->consumed_char = 0;
7152   coding->produced = coding->produced_char = 0;
7153   coding->chars_at_source = 0;
7154   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7155   coding->errors = 0;
7156
7157   ALLOC_CONVERSION_WORK_AREA (coding);
7158
7159   attrs = CODING_ID_ATTRS (coding->id);
7160   translation_table = get_translation_table (attrs, 0, NULL);
7161
7162   carryover = 0;
7163   if (coding->decoder == decode_coding_ccl)
7164     {
7165       coding->spec.ccl = &cclspec;
7166       setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7167     }
7168   do
7169     {
7170       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7171
7172       coding_set_source (coding);
7173       coding->annotated = 0;
7174       coding->charbuf_used = carryover;
7175       (*(coding->decoder)) (coding);
7176       coding_set_destination (coding);
7177       carryover = produce_chars (coding, translation_table, 0);
7178       if (coding->annotated)
7179         produce_annotation (coding, pos);
7180       for (i = 0; i < carryover; i++)
7181         coding->charbuf[i]
7182           = coding->charbuf[coding->charbuf_used - carryover + i];
7183     }
7184   while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7185          || (coding->consumed < coding->src_bytes
7186              && (coding->result == CODING_RESULT_SUCCESS
7187                  || coding->result == CODING_RESULT_INVALID_SRC)));
7188
7189   if (carryover > 0)
7190     {
7191       coding_set_destination (coding);
7192       coding->charbuf_used = carryover;
7193       produce_chars (coding, translation_table, 1);
7194     }
7195
7196   coding->carryover_bytes = 0;
7197   if (coding->consumed < coding->src_bytes)
7198     {
7199       int nbytes = coding->src_bytes - coding->consumed;
7200       const unsigned char *src;
7201
7202       coding_set_source (coding);
7203       coding_set_destination (coding);
7204       src = coding->source + coding->consumed;
7205
7206       if (coding->mode & CODING_MODE_LAST_BLOCK)
7207         {
7208           /* Flush out unprocessed data as binary chars.  We are sure
7209              that the number of data is less than the size of
7210              coding->charbuf.  */
7211           coding->charbuf_used = 0;
7212           coding->chars_at_source = 0;
7213
7214           while (nbytes-- > 0)
7215             {
7216               int c = *src++;
7217
7218               if (c & 0x80)
7219                 c = BYTE8_TO_CHAR (c);
7220               coding->charbuf[coding->charbuf_used++] = c;
7221             }
7222           produce_chars (coding, Qnil, 1);
7223         }
7224       else
7225         {
7226           /* Record unprocessed bytes in coding->carryover.  We are
7227              sure that the number of data is less than the size of
7228              coding->carryover.  */
7229           unsigned char *p = coding->carryover;
7230
7231           if (nbytes > sizeof coding->carryover)
7232             nbytes = sizeof coding->carryover;
7233           coding->carryover_bytes = nbytes;
7234           while (nbytes-- > 0)
7235             *p++ = *src++;
7236         }
7237       coding->consumed = coding->src_bytes;
7238     }
7239
7240   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7241       && !inhibit_eol_conversion)
7242     decode_eol (coding);
7243   if (BUFFERP (coding->dst_object))
7244     {
7245       current_buffer->undo_list = undo_list;
7246       record_insert (coding->dst_pos, coding->produced_char);
7247     }
7248   return coding->result;
7249 }
7250
7251
7252 /* Extract an annotation datum from a composition starting at POS and
7253    ending before LIMIT of CODING->src_object (buffer or string), store
7254    the data in BUF, set *STOP to a starting position of the next
7255    composition (if any) or to LIMIT, and return the address of the
7256    next element of BUF.
7257
7258    If such an annotation is not found, set *STOP to a starting
7259    position of a composition after POS (if any) or to LIMIT, and
7260    return BUF.  */
7261
7262 static INLINE int *
7263 handle_composition_annotation (pos, limit, coding, buf, stop)
7264      EMACS_INT pos, limit;
7265      struct coding_system *coding;
7266      int *buf;
7267      EMACS_INT *stop;
7268 {
7269   EMACS_INT start, end;
7270   Lisp_Object prop;
7271
7272   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7273       || end > limit)
7274     *stop = limit;
7275   else if (start > pos)
7276     *stop = start;
7277   else
7278     {
7279       if (start == pos)
7280         {
7281           /* We found a composition.  Store the corresponding
7282              annotation data in BUF.  */
7283           int *head = buf;
7284           enum composition_method method = COMPOSITION_METHOD (prop);
7285           int nchars = COMPOSITION_LENGTH (prop);
7286
7287           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7288           if (method != COMPOSITION_RELATIVE)
7289             {
7290               Lisp_Object components;
7291               int len, i, i_byte;
7292
7293               components = COMPOSITION_COMPONENTS (prop);
7294               if (VECTORP (components))
7295                 {
7296                   len = XVECTOR (components)->size;
7297                   for (i = 0; i < len; i++)
7298                     *buf++ = XINT (AREF (components, i));
7299                 }
7300               else if (STRINGP (components))
7301                 {
7302                   len = SCHARS (components);
7303                   i = i_byte = 0;
7304                   while (i < len)
7305                     {
7306                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7307                       buf++;
7308                     }
7309                 }
7310               else if (INTEGERP (components))
7311                 {
7312                   len = 1;
7313                   *buf++ = XINT (components);
7314                 }
7315               else if (CONSP (components))
7316                 {
7317                   for (len = 0; CONSP (components);
7318                        len++, components = XCDR (components))
7319                     *buf++ = XINT (XCAR (components));
7320                 }
7321               else
7322                 abort ();
7323               *head -= len;
7324             }
7325         }
7326
7327       if (find_composition (end, limit, &start, &end, &prop,
7328                             coding->src_object)
7329           && end <= limit)
7330         *stop = start;
7331       else
7332         *stop = limit;
7333     }
7334   return buf;
7335 }
7336
7337
7338 /* Extract an annotation datum from a text property `charset' at POS of
7339    CODING->src_object (buffer of string), store the data in BUF, set
7340    *STOP to the position where the value of `charset' property changes
7341    (limiting by LIMIT), and return the address of the next element of
7342    BUF.
7343
7344    If the property value is nil, set *STOP to the position where the
7345    property value is non-nil (limiting by LIMIT), and return BUF.  */
7346
7347 static INLINE int *
7348 handle_charset_annotation (pos, limit, coding, buf, stop)
7349      EMACS_INT pos, limit;
7350      struct coding_system *coding;
7351      int *buf;
7352      EMACS_INT *stop;
7353 {
7354   Lisp_Object val, next;
7355   int id;
7356
7357   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7358   if (! NILP (val) && CHARSETP (val))
7359     id = XINT (CHARSET_SYMBOL_ID (val));
7360   else
7361     id = -1;
7362   ADD_CHARSET_DATA (buf, 0, id);
7363   next = Fnext_single_property_change (make_number (pos), Qcharset,
7364                                        coding->src_object,
7365                                        make_number (limit));
7366   *stop = XINT (next);
7367   return buf;
7368 }
7369
7370
7371 static void
7372 consume_chars (coding, translation_table, max_lookup)
7373      struct coding_system *coding;
7374      Lisp_Object translation_table;
7375      int max_lookup;
7376 {
7377   int *buf = coding->charbuf;
7378   int *buf_end = coding->charbuf + coding->charbuf_size;
7379   const unsigned char *src = coding->source + coding->consumed;
7380   const unsigned char *src_end = coding->source + coding->src_bytes;
7381   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7382   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7383   int multibytep = coding->src_multibyte;
7384   Lisp_Object eol_type;
7385   int c;
7386   EMACS_INT stop, stop_composition, stop_charset;
7387   int *lookup_buf = NULL;
7388
7389   if (! NILP (translation_table))
7390     lookup_buf = alloca (sizeof (int) * max_lookup);
7391
7392   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7393   if (VECTORP (eol_type))
7394     eol_type = Qunix;
7395
7396   /* Note: composition handling is not yet implemented.  */
7397   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7398
7399   if (NILP (coding->src_object))
7400     stop = stop_composition = stop_charset = end_pos;
7401   else
7402     {
7403       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7404         stop = stop_composition = pos;
7405       else
7406         stop = stop_composition = end_pos;
7407       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7408         stop = stop_charset = pos;
7409       else
7410         stop_charset = end_pos;
7411     }
7412
7413   /* Compensate for CRLF and conversion.  */
7414   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7415   while (buf < buf_end)
7416     {
7417       Lisp_Object trans;
7418
7419       if (pos == stop)
7420         {
7421           if (pos == end_pos)
7422             break;
7423           if (pos == stop_composition)
7424             buf = handle_composition_annotation (pos, end_pos, coding,
7425                                                  buf, &stop_composition);
7426           if (pos == stop_charset)
7427             buf = handle_charset_annotation (pos, end_pos, coding,
7428                                              buf, &stop_charset);
7429           stop = (stop_composition < stop_charset
7430                   ? stop_composition : stop_charset);
7431         }
7432
7433       if (! multibytep)
7434         {
7435           EMACS_INT bytes;
7436
7437           if (coding->encoder == encode_coding_raw_text
7438               || coding->encoder == encode_coding_ccl)
7439             c = *src++, pos++;
7440           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7441             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7442           else
7443             c = BYTE8_TO_CHAR (*src), src++, pos++;
7444         }
7445       else
7446         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7447       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7448         c = '\n';
7449       if (! EQ (eol_type, Qunix))
7450         {
7451           if (c == '\n')
7452             {
7453               if (EQ (eol_type, Qdos))
7454                 *buf++ = '\r';
7455               else
7456                 c = '\r';
7457             }
7458         }
7459
7460       trans = Qnil;
7461       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7462       if (NILP (trans))
7463         *buf++ = c;
7464       else
7465         {
7466           int from_nchars = 1, to_nchars = 1;
7467           int *lookup_buf_end;
7468           const unsigned char *p = src;
7469           int i;
7470
7471           lookup_buf[0] = c;
7472           for (i = 1; i < max_lookup && p < src_end; i++)
7473             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7474           lookup_buf_end = lookup_buf + i;
7475           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7476           if (INTEGERP (trans))
7477             c = XINT (trans);
7478           else if (CONSP (trans))
7479             {
7480               from_nchars = ASIZE (XCAR (trans));
7481               trans = XCDR (trans);
7482               if (INTEGERP (trans))
7483                 c = XINT (trans);
7484               else
7485                 {
7486                   to_nchars = ASIZE (trans);
7487                   if (buf + to_nchars > buf_end)
7488                     break;
7489                   c = XINT (AREF (trans, 0));
7490                 }
7491             }
7492           else
7493             break;
7494           *buf++ = c;
7495           for (i = 1; i < to_nchars; i++)
7496             *buf++ = XINT (AREF (trans, i));
7497           for (i = 1; i < from_nchars; i++, pos++)
7498             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7499         }
7500     }
7501
7502   coding->consumed = src - coding->source;
7503   coding->consumed_char = pos - coding->src_pos;
7504   coding->charbuf_used = buf - coding->charbuf;
7505   coding->chars_at_source = 0;
7506 }
7507
7508
7509 /* Encode the text at CODING->src_object into CODING->dst_object.
7510    CODING->src_object is a buffer or a string.
7511    CODING->dst_object is a buffer or nil.
7512
7513    If CODING->src_object is a buffer, it must be the current buffer.
7514    In this case, if CODING->src_pos is positive, it is a position of
7515    the source text in the buffer, otherwise. the source text is in the
7516    gap area of the buffer, and coding->src_pos specifies the offset of
7517    the text from GPT (which must be the same as PT).  If this is the
7518    same buffer as CODING->dst_object, CODING->src_pos must be
7519    negative and CODING should not have `pre-write-conversion'.
7520
7521    If CODING->src_object is a string, CODING should not have
7522    `pre-write-conversion'.
7523
7524    If CODING->dst_object is a buffer, the encoded data is inserted at
7525    the current point of that buffer.
7526
7527    If CODING->dst_object is nil, the encoded data is placed at the
7528    memory area specified by CODING->destination.  */
7529
7530 static int
7531 encode_coding (coding)
7532      struct coding_system *coding;
7533 {
7534   Lisp_Object attrs;
7535   Lisp_Object translation_table;
7536   int max_lookup;
7537
7538   attrs = CODING_ID_ATTRS (coding->id);
7539   if (coding->encoder == encode_coding_raw_text)
7540     translation_table = Qnil, max_lookup = 0;
7541   else
7542     translation_table = get_translation_table (attrs, 1, &max_lookup);
7543
7544   if (BUFFERP (coding->dst_object))
7545     {
7546       set_buffer_internal (XBUFFER (coding->dst_object));
7547       coding->dst_multibyte
7548         = ! NILP (current_buffer->enable_multibyte_characters);
7549     }
7550
7551   coding->consumed = coding->consumed_char = 0;
7552   coding->produced = coding->produced_char = 0;
7553   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7554   coding->errors = 0;
7555
7556   ALLOC_CONVERSION_WORK_AREA (coding);
7557
7558   do {
7559     coding_set_source (coding);
7560     consume_chars (coding, translation_table, max_lookup);
7561     coding_set_destination (coding);
7562     (*(coding->encoder)) (coding);
7563   } while (coding->consumed_char < coding->src_chars);
7564
7565   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7566     insert_from_gap (coding->produced_char, coding->produced);
7567
7568   return (coding->result);
7569 }
7570
7571
7572 /* Name (or base name) of work buffer for code conversion.  */
7573 static Lisp_Object Vcode_conversion_workbuf_name;
7574
7575 /* A working buffer used by the top level conversion.  Once it is
7576    created, it is never destroyed.  It has the name
7577    Vcode_conversion_workbuf_name.  The other working buffers are
7578    destroyed after the use is finished, and their names are modified
7579    versions of Vcode_conversion_workbuf_name.  */
7580 static Lisp_Object Vcode_conversion_reused_workbuf;
7581
7582 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7583 static int reused_workbuf_in_use;
7584
7585
7586 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7587    multibyteness of returning buffer.  */
7588
7589 static Lisp_Object
7590 make_conversion_work_buffer (multibyte)
7591      int multibyte;
7592 {
7593   Lisp_Object name, workbuf;
7594   struct buffer *current;
7595
7596   if (reused_workbuf_in_use++)
7597     {
7598       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7599       workbuf = Fget_buffer_create (name);
7600     }
7601   else
7602     {
7603       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7604         Vcode_conversion_reused_workbuf
7605           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7606       workbuf = Vcode_conversion_reused_workbuf;
7607     }
7608   current = current_buffer;
7609   set_buffer_internal (XBUFFER (workbuf));
7610   /* We can't allow modification hooks to run in the work buffer.  For
7611      instance, directory_files_internal assumes that file decoding
7612      doesn't compile new regexps.  */
7613   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7614   Ferase_buffer ();
7615   current_buffer->undo_list = Qt;
7616   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7617   set_buffer_internal (current);
7618   return workbuf;
7619 }
7620
7621
7622 static Lisp_Object
7623 code_conversion_restore (arg)
7624      Lisp_Object arg;
7625 {
7626   Lisp_Object current, workbuf;
7627   struct gcpro gcpro1;
7628
7629   GCPRO1 (arg);
7630   current = XCAR (arg);
7631   workbuf = XCDR (arg);
7632   if (! NILP (workbuf))
7633     {
7634       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7635         reused_workbuf_in_use = 0;
7636       else if (! NILP (Fbuffer_live_p (workbuf)))
7637         Fkill_buffer (workbuf);
7638     }
7639   set_buffer_internal (XBUFFER (current));
7640   UNGCPRO;
7641   return Qnil;
7642 }
7643
7644 Lisp_Object
7645 code_conversion_save (with_work_buf, multibyte)
7646      int with_work_buf, multibyte;
7647 {
7648   Lisp_Object workbuf = Qnil;
7649
7650   if (with_work_buf)
7651     workbuf = make_conversion_work_buffer (multibyte);
7652   record_unwind_protect (code_conversion_restore,
7653                          Fcons (Fcurrent_buffer (), workbuf));
7654   return workbuf;
7655 }
7656
7657 int
7658 decode_coding_gap (coding, chars, bytes)
7659      struct coding_system *coding;
7660      EMACS_INT chars, bytes;
7661 {
7662   int count = specpdl_ptr - specpdl;
7663   Lisp_Object attrs;
7664
7665   code_conversion_save (0, 0);
7666
7667   coding->src_object = Fcurrent_buffer ();
7668   coding->src_chars = chars;
7669   coding->src_bytes = bytes;
7670   coding->src_pos = -chars;
7671   coding->src_pos_byte = -bytes;
7672   coding->src_multibyte = chars < bytes;
7673   coding->dst_object = coding->src_object;
7674   coding->dst_pos = PT;
7675   coding->dst_pos_byte = PT_BYTE;
7676   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7677
7678   if (CODING_REQUIRE_DETECTION (coding))
7679     detect_coding (coding);
7680
7681   coding->mode |= CODING_MODE_LAST_BLOCK;
7682   current_buffer->text->inhibit_shrinking = 1;
7683   decode_coding (coding);
7684   current_buffer->text->inhibit_shrinking = 0;
7685
7686   attrs = CODING_ID_ATTRS (coding->id);
7687   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7688     {
7689       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7690       Lisp_Object val;
7691
7692       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7693       val = call1 (CODING_ATTR_POST_READ (attrs),
7694                    make_number (coding->produced_char));
7695       CHECK_NATNUM (val);
7696       coding->produced_char += Z - prev_Z;
7697       coding->produced += Z_BYTE - prev_Z_BYTE;
7698     }
7699
7700   unbind_to (count, Qnil);
7701   return coding->result;
7702 }
7703
7704 int
7705 encode_coding_gap (coding, chars, bytes)
7706      struct coding_system *coding;
7707      EMACS_INT chars, bytes;
7708 {
7709   int count = specpdl_ptr - specpdl;
7710
7711   code_conversion_save (0, 0);
7712
7713   coding->src_object = Fcurrent_buffer ();
7714   coding->src_chars = chars;
7715   coding->src_bytes = bytes;
7716   coding->src_pos = -chars;
7717   coding->src_pos_byte = -bytes;
7718   coding->src_multibyte = chars < bytes;
7719   coding->dst_object = coding->src_object;
7720   coding->dst_pos = PT;
7721   coding->dst_pos_byte = PT_BYTE;
7722
7723   encode_coding (coding);
7724
7725   unbind_to (count, Qnil);
7726   return coding->result;
7727 }
7728
7729
7730 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7731    SRC_OBJECT into DST_OBJECT by coding context CODING.
7732
7733    SRC_OBJECT is a buffer, a string, or Qnil.
7734
7735    If it is a buffer, the text is at point of the buffer.  FROM and TO
7736    are positions in the buffer.
7737
7738    If it is a string, the text is at the beginning of the string.
7739    FROM and TO are indices to the string.
7740
7741    If it is nil, the text is at coding->source.  FROM and TO are
7742    indices to coding->source.
7743
7744    DST_OBJECT is a buffer, Qt, or Qnil.
7745
7746    If it is a buffer, the decoded text is inserted at point of the
7747    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7748    is deleted.
7749
7750    If it is Qt, a string is made from the decoded text, and
7751    set in CODING->dst_object.
7752
7753    If it is Qnil, the decoded text is stored at CODING->destination.
7754    The caller must allocate CODING->dst_bytes bytes at
7755    CODING->destination by xmalloc.  If the decoded text is longer than
7756    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7757  */
7758
7759 void
7760 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7761                       dst_object)
7762      struct coding_system *coding;
7763      Lisp_Object src_object;
7764      EMACS_INT from, from_byte, to, to_byte;
7765      Lisp_Object dst_object;
7766 {
7767   int count = specpdl_ptr - specpdl;
7768   unsigned char *destination;
7769   EMACS_INT dst_bytes;
7770   EMACS_INT chars = to - from;
7771   EMACS_INT bytes = to_byte - from_byte;
7772   Lisp_Object attrs;
7773   int saved_pt = -1, saved_pt_byte;
7774   int need_marker_adjustment = 0;
7775   Lisp_Object old_deactivate_mark;
7776
7777   old_deactivate_mark = Vdeactivate_mark;
7778
7779   if (NILP (dst_object))
7780     {
7781       destination = coding->destination;
7782       dst_bytes = coding->dst_bytes;
7783     }
7784
7785   coding->src_object = src_object;
7786   coding->src_chars = chars;
7787   coding->src_bytes = bytes;
7788   coding->src_multibyte = chars < bytes;
7789
7790   if (STRINGP (src_object))
7791     {
7792       coding->src_pos = from;
7793       coding->src_pos_byte = from_byte;
7794     }
7795   else if (BUFFERP (src_object))
7796     {
7797       set_buffer_internal (XBUFFER (src_object));
7798       if (from != GPT)
7799         move_gap_both (from, from_byte);
7800       if (EQ (src_object, dst_object))
7801         {
7802           struct Lisp_Marker *tail;
7803
7804           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7805             {
7806               tail->need_adjustment
7807                 = tail->charpos == (tail->insertion_type ? from : to);
7808               need_marker_adjustment |= tail->need_adjustment;
7809             }
7810           saved_pt = PT, saved_pt_byte = PT_BYTE;
7811           TEMP_SET_PT_BOTH (from, from_byte);
7812           current_buffer->text->inhibit_shrinking = 1;
7813           del_range_both (from, from_byte, to, to_byte, 1);
7814           coding->src_pos = -chars;
7815           coding->src_pos_byte = -bytes;
7816         }
7817       else
7818         {
7819           coding->src_pos = from;
7820           coding->src_pos_byte = from_byte;
7821         }
7822     }
7823
7824   if (CODING_REQUIRE_DETECTION (coding))
7825     detect_coding (coding);
7826   attrs = CODING_ID_ATTRS (coding->id);
7827
7828   if (EQ (dst_object, Qt)
7829       || (! NILP (CODING_ATTR_POST_READ (attrs))
7830           && NILP (dst_object)))
7831     {
7832       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7833       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7834       coding->dst_pos = BEG;
7835       coding->dst_pos_byte = BEG_BYTE;
7836     }
7837   else if (BUFFERP (dst_object))
7838     {
7839       code_conversion_save (0, 0);
7840       coding->dst_object = dst_object;
7841       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7842       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7843       coding->dst_multibyte
7844         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7845     }
7846   else
7847     {
7848       code_conversion_save (0, 0);
7849       coding->dst_object = Qnil;
7850       /* Most callers presume this will return a multibyte result, and they
7851          won't use `binary' or `raw-text' anyway, so let's not worry about
7852          CODING_FOR_UNIBYTE.  */
7853       coding->dst_multibyte = 1;
7854     }
7855
7856   decode_coding (coding);
7857
7858   if (BUFFERP (coding->dst_object))
7859     set_buffer_internal (XBUFFER (coding->dst_object));
7860
7861   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7862     {
7863       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7864       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7865       Lisp_Object val;
7866
7867       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7868       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7869               old_deactivate_mark);
7870       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7871                         make_number (coding->produced_char));
7872       UNGCPRO;
7873       CHECK_NATNUM (val);
7874       coding->produced_char += Z - prev_Z;
7875       coding->produced += Z_BYTE - prev_Z_BYTE;
7876     }
7877
7878   if (EQ (dst_object, Qt))
7879     {
7880       coding->dst_object = Fbuffer_string ();
7881     }
7882   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7883     {
7884       set_buffer_internal (XBUFFER (coding->dst_object));
7885       if (dst_bytes < coding->produced)
7886         {
7887           destination = xrealloc (destination, coding->produced);
7888           if (! destination)
7889             {
7890               record_conversion_result (coding,
7891                                         CODING_RESULT_INSUFFICIENT_MEM);
7892               unbind_to (count, Qnil);
7893               return;
7894             }
7895           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7896             move_gap_both (BEGV, BEGV_BYTE);
7897           bcopy (BEGV_ADDR, destination, coding->produced);
7898           coding->destination = destination;
7899         }
7900     }
7901
7902   if (saved_pt >= 0)
7903     {
7904       /* This is the case of:
7905          (BUFFERP (src_object) && EQ (src_object, dst_object))
7906          As we have moved PT while replacing the original buffer
7907          contents, we must recover it now.  */
7908       set_buffer_internal (XBUFFER (src_object));
7909       current_buffer->text->inhibit_shrinking = 0;
7910       if (saved_pt < from)
7911         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7912       else if (saved_pt < from + chars)
7913         TEMP_SET_PT_BOTH (from, from_byte);
7914       else if (! NILP (current_buffer->enable_multibyte_characters))
7915         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7916                           saved_pt_byte + (coding->produced - bytes));
7917       else
7918         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7919                           saved_pt_byte + (coding->produced - bytes));
7920
7921       if (need_marker_adjustment)
7922         {
7923           struct Lisp_Marker *tail;
7924
7925           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7926             if (tail->need_adjustment)
7927               {
7928                 tail->need_adjustment = 0;
7929                 if (tail->insertion_type)
7930                   {
7931                     tail->bytepos = from_byte;
7932                     tail->charpos = from;
7933                   }
7934                 else
7935                   {
7936                     tail->bytepos = from_byte + coding->produced;
7937                     tail->charpos
7938                       = (NILP (current_buffer->enable_multibyte_characters)
7939                          ? tail->bytepos : from + coding->produced_char);
7940                   }
7941               }
7942         }
7943     }
7944
7945   Vdeactivate_mark = old_deactivate_mark;
7946   unbind_to (count, coding->dst_object);
7947 }
7948
7949
7950 void
7951 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7952                       dst_object)
7953      struct coding_system *coding;
7954      Lisp_Object src_object;
7955      EMACS_INT from, from_byte, to, to_byte;
7956      Lisp_Object dst_object;
7957 {
7958   int count = specpdl_ptr - specpdl;
7959   EMACS_INT chars = to - from;
7960   EMACS_INT bytes = to_byte - from_byte;
7961   Lisp_Object attrs;
7962   int saved_pt = -1, saved_pt_byte;
7963   int need_marker_adjustment = 0;
7964   int kill_src_buffer = 0;
7965   Lisp_Object old_deactivate_mark;
7966
7967   old_deactivate_mark = Vdeactivate_mark;
7968
7969   coding->src_object = src_object;
7970   coding->src_chars = chars;
7971   coding->src_bytes = bytes;
7972   coding->src_multibyte = chars < bytes;
7973
7974   attrs = CODING_ID_ATTRS (coding->id);
7975
7976   if (EQ (src_object, dst_object))
7977     {
7978       struct Lisp_Marker *tail;
7979
7980       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7981         {
7982           tail->need_adjustment
7983             = tail->charpos == (tail->insertion_type ? from : to);
7984           need_marker_adjustment |= tail->need_adjustment;
7985         }
7986     }
7987
7988   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7989     {
7990       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7991       set_buffer_internal (XBUFFER (coding->src_object));
7992       if (STRINGP (src_object))
7993         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7994       else if (BUFFERP (src_object))
7995         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7996       else
7997         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7998
7999       if (EQ (src_object, dst_object))
8000         {
8001           set_buffer_internal (XBUFFER (src_object));
8002           saved_pt = PT, saved_pt_byte = PT_BYTE;
8003           del_range_both (from, from_byte, to, to_byte, 1);
8004           set_buffer_internal (XBUFFER (coding->src_object));
8005         }
8006
8007       {
8008         Lisp_Object args[3];
8009         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
8010
8011         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
8012                 old_deactivate_mark);
8013         args[0] = CODING_ATTR_PRE_WRITE (attrs);
8014         args[1] = make_number (BEG);
8015         args[2] = make_number (Z);
8016         safe_call (3, args);
8017         UNGCPRO;
8018       }
8019       if (XBUFFER (coding->src_object) != current_buffer)
8020         kill_src_buffer = 1;
8021       coding->src_object = Fcurrent_buffer ();
8022       if (BEG != GPT)
8023         move_gap_both (BEG, BEG_BYTE);
8024       coding->src_chars = Z - BEG;
8025       coding->src_bytes = Z_BYTE - BEG_BYTE;
8026       coding->src_pos = BEG;
8027       coding->src_pos_byte = BEG_BYTE;
8028       coding->src_multibyte = Z < Z_BYTE;
8029     }
8030   else if (STRINGP (src_object))
8031     {
8032       code_conversion_save (0, 0);
8033       coding->src_pos = from;
8034       coding->src_pos_byte = from_byte;
8035     }
8036   else if (BUFFERP (src_object))
8037     {
8038       code_conversion_save (0, 0);
8039       set_buffer_internal (XBUFFER (src_object));
8040       if (EQ (src_object, dst_object))
8041         {
8042           saved_pt = PT, saved_pt_byte = PT_BYTE;
8043           coding->src_object = del_range_1 (from, to, 1, 1);
8044           coding->src_pos = 0;
8045           coding->src_pos_byte = 0;
8046         }
8047       else
8048         {
8049           if (from < GPT && to >= GPT)
8050             move_gap_both (from, from_byte);
8051           coding->src_pos = from;
8052           coding->src_pos_byte = from_byte;
8053         }
8054     }
8055   else
8056     code_conversion_save (0, 0);
8057
8058   if (BUFFERP (dst_object))
8059     {
8060       coding->dst_object = dst_object;
8061       if (EQ (src_object, dst_object))
8062         {
8063           coding->dst_pos = from;
8064           coding->dst_pos_byte = from_byte;
8065         }
8066       else
8067         {
8068           struct buffer *current = current_buffer;
8069
8070           set_buffer_temp (XBUFFER (dst_object));
8071           coding->dst_pos = PT;
8072           coding->dst_pos_byte = PT_BYTE;
8073           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
8074           set_buffer_temp (current);
8075         }
8076       coding->dst_multibyte
8077         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
8078     }
8079   else if (EQ (dst_object, Qt))
8080     {
8081       coding->dst_object = Qnil;
8082       coding->dst_bytes = coding->src_chars;
8083       if (coding->dst_bytes == 0)
8084         coding->dst_bytes = 1;
8085       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
8086       coding->dst_multibyte = 0;
8087     }
8088   else
8089     {
8090       coding->dst_object = Qnil;
8091       coding->dst_multibyte = 0;
8092     }
8093
8094   encode_coding (coding);
8095
8096   if (EQ (dst_object, Qt))
8097     {
8098       if (BUFFERP (coding->dst_object))
8099         coding->dst_object = Fbuffer_string ();
8100       else
8101         {
8102           coding->dst_object
8103             = make_unibyte_string ((char *) coding->destination,
8104                                    coding->produced);
8105           xfree (coding->destination);
8106         }
8107     }
8108
8109   if (saved_pt >= 0)
8110     {
8111       /* This is the case of:
8112          (BUFFERP (src_object) && EQ (src_object, dst_object))
8113          As we have moved PT while replacing the original buffer
8114          contents, we must recover it now.  */
8115       set_buffer_internal (XBUFFER (src_object));
8116       if (saved_pt < from)
8117         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8118       else if (saved_pt < from + chars)
8119         TEMP_SET_PT_BOTH (from, from_byte);
8120       else if (! NILP (current_buffer->enable_multibyte_characters))
8121         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8122                           saved_pt_byte + (coding->produced - bytes));
8123       else
8124         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8125                           saved_pt_byte + (coding->produced - bytes));
8126
8127       if (need_marker_adjustment)
8128         {
8129           struct Lisp_Marker *tail;
8130
8131           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8132             if (tail->need_adjustment)
8133               {
8134                 tail->need_adjustment = 0;
8135                 if (tail->insertion_type)
8136                   {
8137                     tail->bytepos = from_byte;
8138                     tail->charpos = from;
8139                   }
8140                 else
8141                   {
8142                     tail->bytepos = from_byte + coding->produced;
8143                     tail->charpos
8144                       = (NILP (current_buffer->enable_multibyte_characters)
8145                          ? tail->bytepos : from + coding->produced_char);
8146                   }
8147               }
8148         }
8149     }
8150
8151   if (kill_src_buffer)
8152     Fkill_buffer (coding->src_object);
8153
8154   Vdeactivate_mark = old_deactivate_mark;
8155   unbind_to (count, Qnil);
8156 }
8157
8158
8159 Lisp_Object
8160 preferred_coding_system ()
8161 {
8162   int id = coding_categories[coding_priorities[0]].id;
8163
8164   return CODING_ID_NAME (id);
8165 }
8166
8167 \f
8168 #ifdef emacs
8169 /*** 8. Emacs Lisp library functions ***/
8170
8171 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8172        doc: /* Return t if OBJECT is nil or a coding-system.
8173 See the documentation of `define-coding-system' for information
8174 about coding-system objects.  */)
8175      (object)
8176      Lisp_Object object;
8177 {
8178   if (NILP (object)
8179       || CODING_SYSTEM_ID (object) >= 0)
8180     return Qt;
8181   if (! SYMBOLP (object)
8182       || NILP (Fget (object, Qcoding_system_define_form)))
8183     return Qnil;
8184   return Qt;
8185 }
8186
8187 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8188        Sread_non_nil_coding_system, 1, 1, 0,
8189        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8190      (prompt)
8191      Lisp_Object prompt;
8192 {
8193   Lisp_Object val;
8194   do
8195     {
8196       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8197                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8198     }
8199   while (SCHARS (val) == 0);
8200   return (Fintern (val, Qnil));
8201 }
8202
8203 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8204        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8205 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8206 Ignores case when completing coding systems (all Emacs coding systems
8207 are lower-case).  */)
8208      (prompt, default_coding_system)
8209      Lisp_Object prompt, default_coding_system;
8210 {
8211   Lisp_Object val;
8212   int count = SPECPDL_INDEX ();
8213
8214   if (SYMBOLP (default_coding_system))
8215     default_coding_system = SYMBOL_NAME (default_coding_system);
8216   specbind (Qcompletion_ignore_case, Qt);
8217   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8218                           Qt, Qnil, Qcoding_system_history,
8219                           default_coding_system, Qnil);
8220   unbind_to (count, Qnil);
8221   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8222 }
8223
8224 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8225        1, 1, 0,
8226        doc: /* Check validity of CODING-SYSTEM.
8227 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8228 It is valid if it is nil or a symbol defined as a coding system by the
8229 function `define-coding-system'.  */)
8230   (coding_system)
8231      Lisp_Object coding_system;
8232 {
8233   Lisp_Object define_form;
8234
8235   define_form = Fget (coding_system, Qcoding_system_define_form);
8236   if (! NILP (define_form))
8237     {
8238       Fput (coding_system, Qcoding_system_define_form, Qnil);
8239       safe_eval (define_form);
8240     }
8241   if (!NILP (Fcoding_system_p (coding_system)))
8242     return coding_system;
8243   xsignal1 (Qcoding_system_error, coding_system);
8244 }
8245
8246 \f
8247 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8248    HIGHEST is nonzero, return the coding system of the highest
8249    priority among the detected coding systems.  Otherwize return a
8250    list of detected coding systems sorted by their priorities.  If
8251    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8252    multibyte form but contains only ASCII and eight-bit chars.
8253    Otherwise, the bytes are raw bytes.
8254
8255    CODING-SYSTEM controls the detection as below:
8256
8257    If it is nil, detect both text-format and eol-format.  If the
8258    text-format part of CODING-SYSTEM is already specified
8259    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8260    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8261    detect only text-format.  */
8262
8263 Lisp_Object
8264 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
8265                       coding_system)
8266      const unsigned char *src;
8267      EMACS_INT src_chars, src_bytes;
8268      int highest;
8269      int multibytep;
8270      Lisp_Object coding_system;
8271 {
8272   const unsigned char *src_end = src + src_bytes;
8273   Lisp_Object attrs, eol_type;
8274   Lisp_Object val = Qnil;
8275   struct coding_system coding;
8276   int id;
8277   struct coding_detection_info detect_info;
8278   enum coding_category base_category;
8279   int null_byte_found = 0, eight_bit_found = 0;
8280
8281   if (NILP (coding_system))
8282     coding_system = Qundecided;
8283   setup_coding_system (coding_system, &coding);
8284   attrs = CODING_ID_ATTRS (coding.id);
8285   eol_type = CODING_ID_EOL_TYPE (coding.id);
8286   coding_system = CODING_ATTR_BASE_NAME (attrs);
8287
8288   coding.source = src;
8289   coding.src_chars = src_chars;
8290   coding.src_bytes = src_bytes;
8291   coding.src_multibyte = multibytep;
8292   coding.consumed = 0;
8293   coding.mode |= CODING_MODE_LAST_BLOCK;
8294   coding.head_ascii = 0;
8295
8296   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8297
8298   /* At first, detect text-format if necessary.  */
8299   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8300   if (base_category == coding_category_undecided)
8301     {
8302       enum coding_category category;
8303       struct coding_system *this;
8304       int c, i;
8305
8306       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8307       for (; src < src_end; src++)
8308         {
8309           c = *src;
8310           if (c & 0x80)
8311             {
8312               eight_bit_found = 1;
8313               if (null_byte_found)
8314                 break;
8315             }
8316           else if (c < 0x20)
8317             {
8318               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8319                   && ! inhibit_iso_escape_detection
8320                   && ! detect_info.checked)
8321                 {
8322                   if (detect_coding_iso_2022 (&coding, &detect_info))
8323                     {
8324                       /* We have scanned the whole data.  */
8325                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8326                         {
8327                           /* We didn't find an 8-bit code.  We may
8328                              have found a null-byte, but it's very
8329                              rare that a binary file confirm to
8330                              ISO-2022.  */
8331                           src = src_end;
8332                           coding.head_ascii = src - coding.source;
8333                         }
8334                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8335                       break;
8336                     }
8337                 }
8338               else if (! c && !inhibit_null_byte_detection)
8339                 {
8340                   null_byte_found = 1;
8341                   if (eight_bit_found)
8342                     break;
8343                 }
8344               if (! eight_bit_found)
8345                 coding.head_ascii++;
8346             }
8347           else if (! eight_bit_found)
8348             coding.head_ascii++;
8349         }
8350
8351       if (null_byte_found || eight_bit_found
8352           || coding.head_ascii < coding.src_bytes
8353           || detect_info.found)
8354         {
8355           if (coding.head_ascii == coding.src_bytes)
8356             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8357             for (i = 0; i < coding_category_raw_text; i++)
8358               {
8359                 category = coding_priorities[i];
8360                 this = coding_categories + category;
8361                 if (detect_info.found & (1 << category))
8362                   break;
8363               }
8364           else
8365             {
8366               if (null_byte_found)
8367                 {
8368                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8369                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8370                 }
8371               for (i = 0; i < coding_category_raw_text; i++)
8372                 {
8373                   category = coding_priorities[i];
8374                   this = coding_categories + category;
8375
8376                   if (this->id < 0)
8377                     {
8378                       /* No coding system of this category is defined.  */
8379                       detect_info.rejected |= (1 << category);
8380                     }
8381                   else if (category >= coding_category_raw_text)
8382                     continue;
8383                   else if (detect_info.checked & (1 << category))
8384                     {
8385                       if (highest
8386                           && (detect_info.found & (1 << category)))
8387                         break;
8388                     }
8389                   else if ((*(this->detector)) (&coding, &detect_info)
8390                            && highest
8391                            && (detect_info.found & (1 << category)))
8392                     {
8393                       if (category == coding_category_utf_16_auto)
8394                         {
8395                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8396                             category = coding_category_utf_16_le;
8397                           else
8398                             category = coding_category_utf_16_be;
8399                         }
8400                       break;
8401                     }
8402                 }
8403             }
8404         }
8405
8406       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8407           || null_byte_found)
8408         {
8409           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8410           id = CODING_SYSTEM_ID (Qno_conversion);
8411           val = Fcons (make_number (id), Qnil);
8412         }
8413       else if (! detect_info.rejected && ! detect_info.found)
8414         {
8415           detect_info.found = CATEGORY_MASK_ANY;
8416           id = coding_categories[coding_category_undecided].id;
8417           val = Fcons (make_number (id), Qnil);
8418         }
8419       else if (highest)
8420         {
8421           if (detect_info.found)
8422             {
8423               detect_info.found = 1 << category;
8424               val = Fcons (make_number (this->id), Qnil);
8425             }
8426           else
8427             for (i = 0; i < coding_category_raw_text; i++)
8428               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8429                 {
8430                   detect_info.found = 1 << coding_priorities[i];
8431                   id = coding_categories[coding_priorities[i]].id;
8432                   val = Fcons (make_number (id), Qnil);
8433                   break;
8434                 }
8435         }
8436       else
8437         {
8438           int mask = detect_info.rejected | detect_info.found;
8439           int found = 0;
8440
8441           for (i = coding_category_raw_text - 1; i >= 0; i--)
8442             {
8443               category = coding_priorities[i];
8444               if (! (mask & (1 << category)))
8445                 {
8446                   found |= 1 << category;
8447                   id = coding_categories[category].id;
8448                   if (id >= 0)
8449                     val = Fcons (make_number (id), val);
8450                 }
8451             }
8452           for (i = coding_category_raw_text - 1; i >= 0; i--)
8453             {
8454               category = coding_priorities[i];
8455               if (detect_info.found & (1 << category))
8456                 {
8457                   id = coding_categories[category].id;
8458                   val = Fcons (make_number (id), val);
8459                 }
8460             }
8461           detect_info.found |= found;
8462         }
8463     }
8464   else if (base_category == coding_category_utf_8_auto)
8465     {
8466       if (detect_coding_utf_8 (&coding, &detect_info))
8467         {
8468           struct coding_system *this;
8469
8470           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8471             this = coding_categories + coding_category_utf_8_sig;
8472           else
8473             this = coding_categories + coding_category_utf_8_nosig;
8474           val = Fcons (make_number (this->id), Qnil);
8475         }
8476     }
8477   else if (base_category == coding_category_utf_16_auto)
8478     {
8479       if (detect_coding_utf_16 (&coding, &detect_info))
8480         {
8481           struct coding_system *this;
8482
8483           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8484             this = coding_categories + coding_category_utf_16_le;
8485           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8486             this = coding_categories + coding_category_utf_16_be;
8487           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8488             this = coding_categories + coding_category_utf_16_be_nosig;
8489           else
8490             this = coding_categories + coding_category_utf_16_le_nosig;
8491           val = Fcons (make_number (this->id), Qnil);
8492         }
8493     }
8494   else
8495     {
8496       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8497       val = Fcons (make_number (coding.id), Qnil);
8498     }
8499
8500   /* Then, detect eol-format if necessary.  */
8501   {
8502     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8503     Lisp_Object tail;
8504
8505     if (VECTORP (eol_type))
8506       {
8507         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8508           {
8509             if (null_byte_found)
8510               normal_eol = EOL_SEEN_LF;
8511             else
8512               normal_eol = detect_eol (coding.source, src_bytes,
8513                                        coding_category_raw_text);
8514           }
8515         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8516                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8517           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8518                                       coding_category_utf_16_be);
8519         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8520                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8521           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8522                                       coding_category_utf_16_le);
8523       }
8524     else
8525       {
8526         if (EQ (eol_type, Qunix))
8527           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8528         else if (EQ (eol_type, Qdos))
8529           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8530         else
8531           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8532       }
8533
8534     for (tail = val; CONSP (tail); tail = XCDR (tail))
8535       {
8536         enum coding_category category;
8537         int this_eol;
8538
8539         id = XINT (XCAR (tail));
8540         attrs = CODING_ID_ATTRS (id);
8541         category = XINT (CODING_ATTR_CATEGORY (attrs));
8542         eol_type = CODING_ID_EOL_TYPE (id);
8543         if (VECTORP (eol_type))
8544           {
8545             if (category == coding_category_utf_16_be
8546                 || category == coding_category_utf_16_be_nosig)
8547               this_eol = utf_16_be_eol;
8548             else if (category == coding_category_utf_16_le
8549                      || category == coding_category_utf_16_le_nosig)
8550               this_eol = utf_16_le_eol;
8551             else
8552               this_eol = normal_eol;
8553
8554             if (this_eol == EOL_SEEN_LF)
8555               XSETCAR (tail, AREF (eol_type, 0));
8556             else if (this_eol == EOL_SEEN_CRLF)
8557               XSETCAR (tail, AREF (eol_type, 1));
8558             else if (this_eol == EOL_SEEN_CR)
8559               XSETCAR (tail, AREF (eol_type, 2));
8560             else
8561               XSETCAR (tail, CODING_ID_NAME (id));
8562           }
8563         else
8564           XSETCAR (tail, CODING_ID_NAME (id));
8565       }
8566   }
8567
8568   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8569 }
8570
8571
8572 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8573        2, 3, 0,
8574        doc: /* Detect coding system of the text in the region between START and END.
8575 Return a list of possible coding systems ordered by priority.
8576 The coding systems to try and their priorities follows what
8577 the function `coding-system-priority-list' (which see) returns.
8578
8579 If only ASCII characters are found (except for such ISO-2022 control
8580 characters as ESC), it returns a list of single element `undecided'
8581 or its subsidiary coding system according to a detected end-of-line
8582 format.
8583
8584 If optional argument HIGHEST is non-nil, return the coding system of
8585 highest priority.  */)
8586      (start, end, highest)
8587      Lisp_Object start, end, highest;
8588 {
8589   int from, to;
8590   int from_byte, to_byte;
8591
8592   CHECK_NUMBER_COERCE_MARKER (start);
8593   CHECK_NUMBER_COERCE_MARKER (end);
8594
8595   validate_region (&start, &end);
8596   from = XINT (start), to = XINT (end);
8597   from_byte = CHAR_TO_BYTE (from);
8598   to_byte = CHAR_TO_BYTE (to);
8599
8600   if (from < GPT && to >= GPT)
8601     move_gap_both (to, to_byte);
8602
8603   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8604                                to - from, to_byte - from_byte,
8605                                !NILP (highest),
8606                                !NILP (current_buffer
8607                                       ->enable_multibyte_characters),
8608                                Qnil);
8609 }
8610
8611 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8612        1, 2, 0,
8613        doc: /* Detect coding system of the text in STRING.
8614 Return a list of possible coding systems ordered by priority.
8615 The coding systems to try and their priorities follows what
8616 the function `coding-system-priority-list' (which see) returns.
8617
8618 If only ASCII characters are found (except for such ISO-2022 control
8619 characters as ESC), it returns a list of single element `undecided'
8620 or its subsidiary coding system according to a detected end-of-line
8621 format.
8622
8623 If optional argument HIGHEST is non-nil, return the coding system of
8624 highest priority.  */)
8625      (string, highest)
8626      Lisp_Object string, highest;
8627 {
8628   CHECK_STRING (string);
8629
8630   return detect_coding_system (SDATA (string),
8631                                SCHARS (string), SBYTES (string),
8632                                !NILP (highest), STRING_MULTIBYTE (string),
8633                                Qnil);
8634 }
8635
8636
8637 static INLINE int
8638 char_encodable_p (c, attrs)
8639      int c;
8640      Lisp_Object attrs;
8641 {
8642   Lisp_Object tail;
8643   struct charset *charset;
8644   Lisp_Object translation_table;
8645
8646   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8647   if (! NILP (translation_table))
8648     c = translate_char (translation_table, c);
8649   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8650        CONSP (tail); tail = XCDR (tail))
8651     {
8652       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8653       if (CHAR_CHARSET_P (c, charset))
8654         break;
8655     }
8656   return (! NILP (tail));
8657 }
8658
8659
8660 /* Return a list of coding systems that safely encode the text between
8661    START and END.  If EXCLUDE is non-nil, it is a list of coding
8662    systems not to check.  The returned list doesn't contain any such
8663    coding systems.  In any case, if the text contains only ASCII or is
8664    unibyte, return t.  */
8665
8666 DEFUN ("find-coding-systems-region-internal",
8667        Ffind_coding_systems_region_internal,
8668        Sfind_coding_systems_region_internal, 2, 3, 0,
8669        doc: /* Internal use only.  */)
8670      (start, end, exclude)
8671      Lisp_Object start, end, exclude;
8672 {
8673   Lisp_Object coding_attrs_list, safe_codings;
8674   EMACS_INT start_byte, end_byte;
8675   const unsigned char *p, *pbeg, *pend;
8676   int c;
8677   Lisp_Object tail, elt, work_table;
8678
8679   if (STRINGP (start))
8680     {
8681       if (!STRING_MULTIBYTE (start)
8682           || SCHARS (start) == SBYTES (start))
8683         return Qt;
8684       start_byte = 0;
8685       end_byte = SBYTES (start);
8686     }
8687   else
8688     {
8689       CHECK_NUMBER_COERCE_MARKER (start);
8690       CHECK_NUMBER_COERCE_MARKER (end);
8691       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8692         args_out_of_range (start, end);
8693       if (NILP (current_buffer->enable_multibyte_characters))
8694         return Qt;
8695       start_byte = CHAR_TO_BYTE (XINT (start));
8696       end_byte = CHAR_TO_BYTE (XINT (end));
8697       if (XINT (end) - XINT (start) == end_byte - start_byte)
8698         return Qt;
8699
8700       if (XINT (start) < GPT && XINT (end) > GPT)
8701         {
8702           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8703             move_gap_both (XINT (start), start_byte);
8704           else
8705             move_gap_both (XINT (end), end_byte);
8706         }
8707     }
8708
8709   coding_attrs_list = Qnil;
8710   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8711     if (NILP (exclude)
8712         || NILP (Fmemq (XCAR (tail), exclude)))
8713       {
8714         Lisp_Object attrs;
8715
8716         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8717         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8718             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8719           {
8720             ASET (attrs, coding_attr_trans_tbl,
8721                   get_translation_table (attrs, 1, NULL));
8722             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8723           }
8724       }
8725
8726   if (STRINGP (start))
8727     p = pbeg = SDATA (start);
8728   else
8729     p = pbeg = BYTE_POS_ADDR (start_byte);
8730   pend = p + (end_byte - start_byte);
8731
8732   while (p < pend && ASCII_BYTE_P (*p)) p++;
8733   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8734
8735   work_table = Fmake_char_table (Qnil, Qnil);
8736   while (p < pend)
8737     {
8738       if (ASCII_BYTE_P (*p))
8739         p++;
8740       else
8741         {
8742           c = STRING_CHAR_ADVANCE (p);
8743           if (!NILP (char_table_ref (work_table, c)))
8744             /* This character was already checked.  Ignore it.  */
8745             continue;
8746
8747           charset_map_loaded = 0;
8748           for (tail = coding_attrs_list; CONSP (tail);)
8749             {
8750               elt = XCAR (tail);
8751               if (NILP (elt))
8752                 tail = XCDR (tail);
8753               else if (char_encodable_p (c, elt))
8754                 tail = XCDR (tail);
8755               else if (CONSP (XCDR (tail)))
8756                 {
8757                   XSETCAR (tail, XCAR (XCDR (tail)));
8758                   XSETCDR (tail, XCDR (XCDR (tail)));
8759                 }
8760               else
8761                 {
8762                   XSETCAR (tail, Qnil);
8763                   tail = XCDR (tail);
8764                 }
8765             }
8766           if (charset_map_loaded)
8767             {
8768               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8769
8770               if (STRINGP (start))
8771                 pbeg = SDATA (start);
8772               else
8773                 pbeg = BYTE_POS_ADDR (start_byte);
8774               p = pbeg + p_offset;
8775               pend = pbeg + pend_offset;
8776             }
8777           char_table_set (work_table, c, Qt);
8778         }
8779     }
8780
8781   safe_codings = list2 (Qraw_text, Qno_conversion);
8782   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8783     if (! NILP (XCAR (tail)))
8784       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8785
8786   return safe_codings;
8787 }
8788
8789
8790 DEFUN ("unencodable-char-position", Funencodable_char_position,
8791        Sunencodable_char_position, 3, 5, 0,
8792        doc: /*
8793 Return position of first un-encodable character in a region.
8794 START and END specify the region and CODING-SYSTEM specifies the
8795 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8796
8797 If optional 4th argument COUNT is non-nil, it specifies at most how
8798 many un-encodable characters to search.  In this case, the value is a
8799 list of positions.
8800
8801 If optional 5th argument STRING is non-nil, it is a string to search
8802 for un-encodable characters.  In that case, START and END are indexes
8803 to the string.  */)
8804      (start, end, coding_system, count, string)
8805      Lisp_Object start, end, coding_system, count, string;
8806 {
8807   int n;
8808   struct coding_system coding;
8809   Lisp_Object attrs, charset_list, translation_table;
8810   Lisp_Object positions;
8811   int from, to;
8812   const unsigned char *p, *stop, *pend;
8813   int ascii_compatible;
8814
8815   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8816   attrs = CODING_ID_ATTRS (coding.id);
8817   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8818     return Qnil;
8819   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8820   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8821   translation_table = get_translation_table (attrs, 1, NULL);
8822
8823   if (NILP (string))
8824     {
8825       validate_region (&start, &end);
8826       from = XINT (start);
8827       to = XINT (end);
8828       if (NILP (current_buffer->enable_multibyte_characters)
8829           || (ascii_compatible
8830               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8831         return Qnil;
8832       p = CHAR_POS_ADDR (from);
8833       pend = CHAR_POS_ADDR (to);
8834       if (from < GPT && to >= GPT)
8835         stop = GPT_ADDR;
8836       else
8837         stop = pend;
8838     }
8839   else
8840     {
8841       CHECK_STRING (string);
8842       CHECK_NATNUM (start);
8843       CHECK_NATNUM (end);
8844       from = XINT (start);
8845       to = XINT (end);
8846       if (from > to
8847           || to > SCHARS (string))
8848         args_out_of_range_3 (string, start, end);
8849       if (! STRING_MULTIBYTE (string))
8850         return Qnil;
8851       p = SDATA (string) + string_char_to_byte (string, from);
8852       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8853       if (ascii_compatible && (to - from) == (pend - p))
8854         return Qnil;
8855     }
8856
8857   if (NILP (count))
8858     n = 1;
8859   else
8860     {
8861       CHECK_NATNUM (count);
8862       n = XINT (count);
8863     }
8864
8865   positions = Qnil;
8866   while (1)
8867     {
8868       int c;
8869
8870       if (ascii_compatible)
8871         while (p < stop && ASCII_BYTE_P (*p))
8872           p++, from++;
8873       if (p >= stop)
8874         {
8875           if (p >= pend)
8876             break;
8877           stop = pend;
8878           p = GAP_END_ADDR;
8879         }
8880
8881       c = STRING_CHAR_ADVANCE (p);
8882       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8883           && ! char_charset (translate_char (translation_table, c),
8884                              charset_list, NULL))
8885         {
8886           positions = Fcons (make_number (from), positions);
8887           n--;
8888           if (n == 0)
8889             break;
8890         }
8891
8892       from++;
8893     }
8894
8895   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8896 }
8897
8898
8899 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8900        Scheck_coding_systems_region, 3, 3, 0,
8901        doc: /* Check if the region is encodable by coding systems.
8902
8903 START and END are buffer positions specifying the region.
8904 CODING-SYSTEM-LIST is a list of coding systems to check.
8905
8906 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8907 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8908 whole region, POS0, POS1, ... are buffer positions where non-encodable
8909 characters are found.
8910
8911 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8912 value is nil.
8913
8914 START may be a string.  In that case, check if the string is
8915 encodable, and the value contains indices to the string instead of
8916 buffer positions.  END is ignored.
8917
8918 If the current buffer (or START if it is a string) is unibyte, the value
8919 is nil.  */)
8920      (start, end, coding_system_list)
8921      Lisp_Object start, end, coding_system_list;
8922 {
8923   Lisp_Object list;
8924   EMACS_INT start_byte, end_byte;
8925   int pos;
8926   const unsigned char *p, *pbeg, *pend;
8927   int c;
8928   Lisp_Object tail, elt, attrs;
8929
8930   if (STRINGP (start))
8931     {
8932       if (!STRING_MULTIBYTE (start)
8933           || SCHARS (start) == SBYTES (start))
8934         return Qnil;
8935       start_byte = 0;
8936       end_byte = SBYTES (start);
8937       pos = 0;
8938     }
8939   else
8940     {
8941       CHECK_NUMBER_COERCE_MARKER (start);
8942       CHECK_NUMBER_COERCE_MARKER (end);
8943       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8944         args_out_of_range (start, end);
8945       if (NILP (current_buffer->enable_multibyte_characters))
8946         return Qnil;
8947       start_byte = CHAR_TO_BYTE (XINT (start));
8948       end_byte = CHAR_TO_BYTE (XINT (end));
8949       if (XINT (end) - XINT (start) == end_byte - start_byte)
8950         return Qnil;
8951
8952       if (XINT (start) < GPT && XINT (end) > GPT)
8953         {
8954           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8955             move_gap_both (XINT (start), start_byte);
8956           else
8957             move_gap_both (XINT (end), end_byte);
8958         }
8959       pos = XINT (start);
8960     }
8961
8962   list = Qnil;
8963   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8964     {
8965       elt = XCAR (tail);
8966       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8967       ASET (attrs, coding_attr_trans_tbl,
8968             get_translation_table (attrs, 1, NULL));
8969       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8970     }
8971
8972   if (STRINGP (start))
8973     p = pbeg = SDATA (start);
8974   else
8975     p = pbeg = BYTE_POS_ADDR (start_byte);
8976   pend = p + (end_byte - start_byte);
8977
8978   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8979   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8980
8981   while (p < pend)
8982     {
8983       if (ASCII_BYTE_P (*p))
8984         p++;
8985       else
8986         {
8987           c = STRING_CHAR_ADVANCE (p);
8988
8989           charset_map_loaded = 0;
8990           for (tail = list; CONSP (tail); tail = XCDR (tail))
8991             {
8992               elt = XCDR (XCAR (tail));
8993               if (! char_encodable_p (c, XCAR (elt)))
8994                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8995             }
8996           if (charset_map_loaded)
8997             {
8998               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8999
9000               if (STRINGP (start))
9001                 pbeg = SDATA (start);
9002               else
9003                 pbeg = BYTE_POS_ADDR (start_byte);
9004               p = pbeg + p_offset;
9005               pend = pbeg + pend_offset;
9006             }
9007         }
9008       pos++;
9009     }
9010
9011   tail = list;
9012   list = Qnil;
9013   for (; CONSP (tail); tail = XCDR (tail))
9014     {
9015       elt = XCAR (tail);
9016       if (CONSP (XCDR (XCDR (elt))))
9017         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
9018                       list);
9019     }
9020
9021   return list;
9022 }
9023
9024
9025 Lisp_Object
9026 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
9027      Lisp_Object start, end, coding_system, dst_object;
9028      int encodep, norecord;
9029 {
9030   struct coding_system coding;
9031   EMACS_INT from, from_byte, to, to_byte;
9032   Lisp_Object src_object;
9033
9034   CHECK_NUMBER_COERCE_MARKER (start);
9035   CHECK_NUMBER_COERCE_MARKER (end);
9036   if (NILP (coding_system))
9037     coding_system = Qno_conversion;
9038   else
9039     CHECK_CODING_SYSTEM (coding_system);
9040   src_object = Fcurrent_buffer ();
9041   if (NILP (dst_object))
9042     dst_object = src_object;
9043   else if (! EQ (dst_object, Qt))
9044     CHECK_BUFFER (dst_object);
9045
9046   validate_region (&start, &end);
9047   from = XFASTINT (start);
9048   from_byte = CHAR_TO_BYTE (from);
9049   to = XFASTINT (end);
9050   to_byte = CHAR_TO_BYTE (to);
9051
9052   setup_coding_system (coding_system, &coding);
9053   coding.mode |= CODING_MODE_LAST_BLOCK;
9054
9055   if (encodep)
9056     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9057                           dst_object);
9058   else
9059     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9060                           dst_object);
9061   if (! norecord)
9062     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9063
9064   return (BUFFERP (dst_object)
9065           ? make_number (coding.produced_char)
9066           : coding.dst_object);
9067 }
9068
9069
9070 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
9071        3, 4, "r\nzCoding system: ",
9072        doc: /* Decode the current region from the specified coding system.
9073 When called from a program, takes four arguments:
9074         START, END, CODING-SYSTEM, and DESTINATION.
9075 START and END are buffer positions.
9076
9077 Optional 4th arguments DESTINATION specifies where the decoded text goes.
9078 If nil, the region between START and END is replaced by the decoded text.
9079 If buffer, the decoded text is inserted in that buffer after point (point
9080 does not move).
9081 In those cases, the length of the decoded text is returned.
9082 If DESTINATION is t, the decoded text is returned.
9083
9084 This function sets `last-coding-system-used' to the precise coding system
9085 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9086 not fully specified.)  */)
9087      (start, end, coding_system, destination)
9088      Lisp_Object start, end, coding_system, destination;
9089 {
9090   return code_convert_region (start, end, coding_system, destination, 0, 0);
9091 }
9092
9093 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
9094        3, 4, "r\nzCoding system: ",
9095        doc: /* Encode the current region by specified coding system.
9096 When called from a program, takes four arguments:
9097         START, END, CODING-SYSTEM and DESTINATION.
9098 START and END are buffer positions.
9099
9100 Optional 4th arguments DESTINATION specifies where the encoded text goes.
9101 If nil, the region between START and END is replace by the encoded text.
9102 If buffer, the encoded text is inserted in that buffer after point (point
9103 does not move).
9104 In those cases, the length of the encoded text is returned.
9105 If DESTINATION is t, the encoded text is returned.
9106
9107 This function sets `last-coding-system-used' to the precise coding system
9108 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9109 not fully specified.)  */)
9110   (start, end, coding_system, destination)
9111      Lisp_Object start, end, coding_system, destination;
9112 {
9113   return code_convert_region (start, end, coding_system, destination, 1, 0);
9114 }
9115
9116 Lisp_Object
9117 code_convert_string (string, coding_system, dst_object,
9118                      encodep, nocopy, norecord)
9119      Lisp_Object string, coding_system, dst_object;
9120      int encodep, nocopy, norecord;
9121 {
9122   struct coding_system coding;
9123   EMACS_INT chars, bytes;
9124
9125   CHECK_STRING (string);
9126   if (NILP (coding_system))
9127     {
9128       if (! norecord)
9129         Vlast_coding_system_used = Qno_conversion;
9130       if (NILP (dst_object))
9131         return (nocopy ? Fcopy_sequence (string) : string);
9132     }
9133
9134   if (NILP (coding_system))
9135     coding_system = Qno_conversion;
9136   else
9137     CHECK_CODING_SYSTEM (coding_system);
9138   if (NILP (dst_object))
9139     dst_object = Qt;
9140   else if (! EQ (dst_object, Qt))
9141     CHECK_BUFFER (dst_object);
9142
9143   setup_coding_system (coding_system, &coding);
9144   coding.mode |= CODING_MODE_LAST_BLOCK;
9145   chars = SCHARS (string);
9146   bytes = SBYTES (string);
9147   if (encodep)
9148     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9149   else
9150     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9151   if (! norecord)
9152     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9153
9154   return (BUFFERP (dst_object)
9155           ? make_number (coding.produced_char)
9156           : coding.dst_object);
9157 }
9158
9159
9160 /* Encode or decode STRING according to CODING_SYSTEM.
9161    Do not set Vlast_coding_system_used.
9162
9163    This function is called only from macros DECODE_FILE and
9164    ENCODE_FILE, thus we ignore character composition.  */
9165
9166 Lisp_Object
9167 code_convert_string_norecord (string, coding_system, encodep)
9168      Lisp_Object string, coding_system;
9169      int encodep;
9170 {
9171   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9172 }
9173
9174
9175 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9176        2, 4, 0,
9177        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9178
9179 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9180 if the decoding operation is trivial.
9181
9182 Optional fourth arg BUFFER non-nil means that the decoded text is
9183 inserted in that buffer after point (point does not move).  In this
9184 case, the return value is the length of the decoded text.
9185
9186 This function sets `last-coding-system-used' to the precise coding system
9187 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9188 not fully specified.)  */)
9189   (string, coding_system, nocopy, buffer)
9190      Lisp_Object string, coding_system, nocopy, buffer;
9191 {
9192   return code_convert_string (string, coding_system, buffer,
9193                               0, ! NILP (nocopy), 0);
9194 }
9195
9196 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9197        2, 4, 0,
9198        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9199
9200 Optional third arg NOCOPY non-nil means it is OK to return STRING
9201 itself if the encoding operation is trivial.
9202
9203 Optional fourth arg BUFFER non-nil means that the encoded text is
9204 inserted in that buffer after point (point does not move).  In this
9205 case, the return value is the length of the encoded text.
9206
9207 This function sets `last-coding-system-used' to the precise coding system
9208 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9209 not fully specified.)  */)
9210      (string, coding_system, nocopy, buffer)
9211      Lisp_Object string, coding_system, nocopy, buffer;
9212 {
9213   return code_convert_string (string, coding_system, buffer,
9214                               1, ! NILP (nocopy), 1);
9215 }
9216
9217 \f
9218 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9219        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9220 Return the corresponding character.  */)
9221      (code)
9222      Lisp_Object code;
9223 {
9224   Lisp_Object spec, attrs, val;
9225   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9226   int c;
9227
9228   CHECK_NATNUM (code);
9229   c = XFASTINT (code);
9230   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9231   attrs = AREF (spec, 0);
9232
9233   if (ASCII_BYTE_P (c)
9234       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9235     return code;
9236
9237   val = CODING_ATTR_CHARSET_LIST (attrs);
9238   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9239   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9240   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9241
9242   if (c <= 0x7F)
9243     charset = charset_roman;
9244   else if (c >= 0xA0 && c < 0xDF)
9245     {
9246       charset = charset_kana;
9247       c -= 0x80;
9248     }
9249   else
9250     {
9251       int s1 = c >> 8, s2 = c & 0xFF;
9252
9253       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9254           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9255         error ("Invalid code: %d", code);
9256       SJIS_TO_JIS (c);
9257       charset = charset_kanji;
9258     }
9259   c = DECODE_CHAR (charset, c);
9260   if (c < 0)
9261     error ("Invalid code: %d", code);
9262   return make_number (c);
9263 }
9264
9265
9266 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9267        doc: /* Encode a Japanese character CH to shift_jis encoding.
9268 Return the corresponding code in SJIS.  */)
9269      (ch)
9270     Lisp_Object ch;
9271 {
9272   Lisp_Object spec, attrs, charset_list;
9273   int c;
9274   struct charset *charset;
9275   unsigned code;
9276
9277   CHECK_CHARACTER (ch);
9278   c = XFASTINT (ch);
9279   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9280   attrs = AREF (spec, 0);
9281
9282   if (ASCII_CHAR_P (c)
9283       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9284     return ch;
9285
9286   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9287   charset = char_charset (c, charset_list, &code);
9288   if (code == CHARSET_INVALID_CODE (charset))
9289     error ("Can't encode by shift_jis encoding: %d", c);
9290   JIS_TO_SJIS (code);
9291
9292   return make_number (code);
9293 }
9294
9295 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9296        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9297 Return the corresponding character.  */)
9298      (code)
9299      Lisp_Object code;
9300 {
9301   Lisp_Object spec, attrs, val;
9302   struct charset *charset_roman, *charset_big5, *charset;
9303   int c;
9304
9305   CHECK_NATNUM (code);
9306   c = XFASTINT (code);
9307   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9308   attrs = AREF (spec, 0);
9309
9310   if (ASCII_BYTE_P (c)
9311       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9312     return code;
9313
9314   val = CODING_ATTR_CHARSET_LIST (attrs);
9315   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9316   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9317
9318   if (c <= 0x7F)
9319     charset = charset_roman;
9320   else
9321     {
9322       int b1 = c >> 8, b2 = c & 0x7F;
9323       if (b1 < 0xA1 || b1 > 0xFE
9324           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9325         error ("Invalid code: %d", code);
9326       charset = charset_big5;
9327     }
9328   c = DECODE_CHAR (charset, (unsigned )c);
9329   if (c < 0)
9330     error ("Invalid code: %d", code);
9331   return make_number (c);
9332 }
9333
9334 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9335        doc: /* Encode the Big5 character CH to BIG5 coding system.
9336 Return the corresponding character code in Big5.  */)
9337      (ch)
9338      Lisp_Object ch;
9339 {
9340   Lisp_Object spec, attrs, charset_list;
9341   struct charset *charset;
9342   int c;
9343   unsigned code;
9344
9345   CHECK_CHARACTER (ch);
9346   c = XFASTINT (ch);
9347   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9348   attrs = AREF (spec, 0);
9349   if (ASCII_CHAR_P (c)
9350       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9351     return ch;
9352
9353   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9354   charset = char_charset (c, charset_list, &code);
9355   if (code == CHARSET_INVALID_CODE (charset))
9356     error ("Can't encode by Big5 encoding: %d", c);
9357
9358   return make_number (code);
9359 }
9360
9361 \f
9362 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9363        Sset_terminal_coding_system_internal, 1, 2, 0,
9364        doc: /* Internal use only.  */)
9365      (coding_system, terminal)
9366      Lisp_Object coding_system;
9367      Lisp_Object terminal;
9368 {
9369   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9370   CHECK_SYMBOL (coding_system);
9371   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9372   /* We had better not send unsafe characters to terminal.  */
9373   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9374   /* Characer composition should be disabled.  */
9375   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9376   terminal_coding->src_multibyte = 1;
9377   terminal_coding->dst_multibyte = 0;
9378   return Qnil;
9379 }
9380
9381 DEFUN ("set-safe-terminal-coding-system-internal",
9382        Fset_safe_terminal_coding_system_internal,
9383        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9384        doc: /* Internal use only.  */)
9385      (coding_system)
9386      Lisp_Object coding_system;
9387 {
9388   CHECK_SYMBOL (coding_system);
9389   setup_coding_system (Fcheck_coding_system (coding_system),
9390                        &safe_terminal_coding);
9391   /* Characer composition should be disabled.  */
9392   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9393   safe_terminal_coding.src_multibyte = 1;
9394   safe_terminal_coding.dst_multibyte = 0;
9395   return Qnil;
9396 }
9397
9398 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9399        Sterminal_coding_system, 0, 1, 0,
9400        doc: /* Return coding system specified for terminal output on the given terminal.
9401 TERMINAL may be a terminal object, a frame, or nil for the selected
9402 frame's terminal device.  */)
9403      (terminal)
9404      Lisp_Object terminal;
9405 {
9406   struct coding_system *terminal_coding
9407     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9408   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9409
9410   /* For backward compatibility, return nil if it is `undecided'. */
9411   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9412 }
9413
9414 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9415        Sset_keyboard_coding_system_internal, 1, 2, 0,
9416        doc: /* Internal use only.  */)
9417      (coding_system, terminal)
9418      Lisp_Object coding_system;
9419      Lisp_Object terminal;
9420 {
9421   struct terminal *t = get_terminal (terminal, 1);
9422   CHECK_SYMBOL (coding_system);
9423   if (NILP (coding_system))
9424     coding_system = Qno_conversion;
9425   else
9426     Fcheck_coding_system (coding_system);
9427   setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9428   /* Characer composition should be disabled.  */
9429   TERMINAL_KEYBOARD_CODING (t)->common_flags
9430     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9431   return Qnil;
9432 }
9433
9434 DEFUN ("keyboard-coding-system",
9435        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9436        doc: /* Return coding system specified for decoding keyboard input.  */)
9437      (terminal)
9438      Lisp_Object terminal;
9439 {
9440   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9441                          (get_terminal (terminal, 1))->id);
9442 }
9443
9444 \f
9445 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9446        Sfind_operation_coding_system,  1, MANY, 0,
9447        doc: /* Choose a coding system for an operation based on the target name.
9448 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9449 DECODING-SYSTEM is the coding system to use for decoding
9450 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9451 for encoding (in case OPERATION does encoding).
9452
9453 The first argument OPERATION specifies an I/O primitive:
9454   For file I/O, `insert-file-contents' or `write-region'.
9455   For process I/O, `call-process', `call-process-region', or `start-process'.
9456   For network I/O, `open-network-stream'.
9457
9458 The remaining arguments should be the same arguments that were passed
9459 to the primitive.  Depending on which primitive, one of those arguments
9460 is selected as the TARGET.  For example, if OPERATION does file I/O,
9461 whichever argument specifies the file name is TARGET.
9462
9463 TARGET has a meaning which depends on OPERATION:
9464   For file I/O, TARGET is a file name (except for the special case below).
9465   For process I/O, TARGET is a process name.
9466   For network I/O, TARGET is a service name or a port number.
9467
9468 This function looks up what is specified for TARGET in
9469 `file-coding-system-alist', `process-coding-system-alist',
9470 or `network-coding-system-alist' depending on OPERATION.
9471 They may specify a coding system, a cons of coding systems,
9472 or a function symbol to call.
9473 In the last case, we call the function with one argument,
9474 which is a list of all the arguments given to this function.
9475 If the function can't decide a coding system, it can return
9476 `undecided' so that the normal code-detection is performed.
9477
9478 If OPERATION is `insert-file-contents', the argument corresponding to
9479 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9480 file name to look up, and BUFFER is a buffer that contains the file's
9481 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9482 function to call for FILENAME, that function should examine the
9483 contents of BUFFER instead of reading the file.
9484
9485 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9486      (nargs, args)
9487      int nargs;
9488      Lisp_Object *args;
9489 {
9490   Lisp_Object operation, target_idx, target, val;
9491   register Lisp_Object chain;
9492
9493   if (nargs < 2)
9494     error ("Too few arguments");
9495   operation = args[0];
9496   if (!SYMBOLP (operation)
9497       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
9498     error ("Invalid first argument");
9499   if (nargs < 1 + XINT (target_idx))
9500     error ("Too few arguments for operation: %s",
9501            SDATA (SYMBOL_NAME (operation)));
9502   target = args[XINT (target_idx) + 1];
9503   if (!(STRINGP (target)
9504         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9505             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9506         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9507     error ("Invalid %dth argument", XINT (target_idx) + 1);
9508   if (CONSP (target))
9509     target = XCAR (target);
9510
9511   chain = ((EQ (operation, Qinsert_file_contents)
9512             || EQ (operation, Qwrite_region))
9513            ? Vfile_coding_system_alist
9514            : (EQ (operation, Qopen_network_stream)
9515               ? Vnetwork_coding_system_alist
9516               : Vprocess_coding_system_alist));
9517   if (NILP (chain))
9518     return Qnil;
9519
9520   for (; CONSP (chain); chain = XCDR (chain))
9521     {
9522       Lisp_Object elt;
9523
9524       elt = XCAR (chain);
9525       if (CONSP (elt)
9526           && ((STRINGP (target)
9527                && STRINGP (XCAR (elt))
9528                && fast_string_match (XCAR (elt), target) >= 0)
9529               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9530         {
9531           val = XCDR (elt);
9532           /* Here, if VAL is both a valid coding system and a valid
9533              function symbol, we return VAL as a coding system.  */
9534           if (CONSP (val))
9535             return val;
9536           if (! SYMBOLP (val))
9537             return Qnil;
9538           if (! NILP (Fcoding_system_p (val)))
9539             return Fcons (val, val);
9540           if (! NILP (Ffboundp (val)))
9541             {
9542               /* We use call1 rather than safe_call1
9543                  so as to get bug reports about functions called here
9544                  which don't handle the current interface.  */
9545               val = call1 (val, Flist (nargs, args));
9546               if (CONSP (val))
9547                 return val;
9548               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9549                 return Fcons (val, val);
9550             }
9551           return Qnil;
9552         }
9553     }
9554   return Qnil;
9555 }
9556
9557 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9558        Sset_coding_system_priority, 0, MANY, 0,
9559        doc: /* Assign higher priority to the coding systems given as arguments.
9560 If multiple coding systems belong to the same category,
9561 all but the first one are ignored.
9562
9563 usage: (set-coding-system-priority &rest coding-systems)  */)
9564      (nargs, args)
9565      int nargs;
9566      Lisp_Object *args;
9567 {
9568   int i, j;
9569   int changed[coding_category_max];
9570   enum coding_category priorities[coding_category_max];
9571
9572   bzero (changed, sizeof changed);
9573
9574   for (i = j = 0; i < nargs; i++)
9575     {
9576       enum coding_category category;
9577       Lisp_Object spec, attrs;
9578
9579       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9580       attrs = AREF (spec, 0);
9581       category = XINT (CODING_ATTR_CATEGORY (attrs));
9582       if (changed[category])
9583         /* Ignore this coding system because a coding system of the
9584            same category already had a higher priority.  */
9585         continue;
9586       changed[category] = 1;
9587       priorities[j++] = category;
9588       if (coding_categories[category].id >= 0
9589           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9590         setup_coding_system (args[i], &coding_categories[category]);
9591       Fset (AREF (Vcoding_category_table, category), args[i]);
9592     }
9593
9594   /* Now we have decided top J priorities.  Reflect the order of the
9595      original priorities to the remaining priorities.  */
9596
9597   for (i = j, j = 0; i < coding_category_max; i++, j++)
9598     {
9599       while (j < coding_category_max
9600              && changed[coding_priorities[j]])
9601         j++;
9602       if (j == coding_category_max)
9603         abort ();
9604       priorities[i] = coding_priorities[j];
9605     }
9606
9607   bcopy (priorities, coding_priorities, sizeof priorities);
9608
9609   /* Update `coding-category-list'.  */
9610   Vcoding_category_list = Qnil;
9611   for (i = coding_category_max - 1; i >= 0; i--)
9612     Vcoding_category_list
9613       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9614                Vcoding_category_list);
9615
9616   return Qnil;
9617 }
9618
9619 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9620        Scoding_system_priority_list, 0, 1, 0,
9621        doc: /* Return a list of coding systems ordered by their priorities.
9622 The list contains a subset of coding systems; i.e. coding systems
9623 assigned to each coding category (see `coding-category-list').
9624
9625 HIGHESTP non-nil means just return the highest priority one.  */)
9626      (highestp)
9627      Lisp_Object highestp;
9628 {
9629   int i;
9630   Lisp_Object val;
9631
9632   for (i = 0, val = Qnil; i < coding_category_max; i++)
9633     {
9634       enum coding_category category = coding_priorities[i];
9635       int id = coding_categories[category].id;
9636       Lisp_Object attrs;
9637
9638       if (id < 0)
9639         continue;
9640       attrs = CODING_ID_ATTRS (id);
9641       if (! NILP (highestp))
9642         return CODING_ATTR_BASE_NAME (attrs);
9643       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9644     }
9645   return Fnreverse (val);
9646 }
9647
9648 static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
9649
9650 static Lisp_Object
9651 make_subsidiaries (base)
9652      Lisp_Object base;
9653 {
9654   Lisp_Object subsidiaries;
9655   int base_name_len = SBYTES (SYMBOL_NAME (base));
9656   char *buf = (char *) alloca (base_name_len + 6);
9657   int i;
9658
9659   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9660   subsidiaries = Fmake_vector (make_number (3), Qnil);
9661   for (i = 0; i < 3; i++)
9662     {
9663       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9664       ASET (subsidiaries, i, intern (buf));
9665     }
9666   return subsidiaries;
9667 }
9668
9669
9670 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9671        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9672        doc: /* For internal use only.
9673 usage: (define-coding-system-internal ...)  */)
9674      (nargs, args)
9675      int nargs;
9676      Lisp_Object *args;
9677 {
9678   Lisp_Object name;
9679   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9680   Lisp_Object attrs;            /* Vector of attributes.  */
9681   Lisp_Object eol_type;
9682   Lisp_Object aliases;
9683   Lisp_Object coding_type, charset_list, safe_charsets;
9684   enum coding_category category;
9685   Lisp_Object tail, val;
9686   int max_charset_id = 0;
9687   int i;
9688
9689   if (nargs < coding_arg_max)
9690     goto short_args;
9691
9692   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9693
9694   name = args[coding_arg_name];
9695   CHECK_SYMBOL (name);
9696   CODING_ATTR_BASE_NAME (attrs) = name;
9697
9698   val = args[coding_arg_mnemonic];
9699   if (! STRINGP (val))
9700     CHECK_CHARACTER (val);
9701   CODING_ATTR_MNEMONIC (attrs) = val;
9702
9703   coding_type = args[coding_arg_coding_type];
9704   CHECK_SYMBOL (coding_type);
9705   CODING_ATTR_TYPE (attrs) = coding_type;
9706
9707   charset_list = args[coding_arg_charset_list];
9708   if (SYMBOLP (charset_list))
9709     {
9710       if (EQ (charset_list, Qiso_2022))
9711         {
9712           if (! EQ (coding_type, Qiso_2022))
9713             error ("Invalid charset-list");
9714           charset_list = Viso_2022_charset_list;
9715         }
9716       else if (EQ (charset_list, Qemacs_mule))
9717         {
9718           if (! EQ (coding_type, Qemacs_mule))
9719             error ("Invalid charset-list");
9720           charset_list = Vemacs_mule_charset_list;
9721         }
9722       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9723         if (max_charset_id < XFASTINT (XCAR (tail)))
9724           max_charset_id = XFASTINT (XCAR (tail));
9725     }
9726   else
9727     {
9728       charset_list = Fcopy_sequence (charset_list);
9729       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9730         {
9731           struct charset *charset;
9732
9733           val = XCAR (tail);
9734           CHECK_CHARSET_GET_CHARSET (val, charset);
9735           if (EQ (coding_type, Qiso_2022)
9736               ? CHARSET_ISO_FINAL (charset) < 0
9737               : EQ (coding_type, Qemacs_mule)
9738               ? CHARSET_EMACS_MULE_ID (charset) < 0
9739               : 0)
9740             error ("Can't handle charset `%s'",
9741                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9742
9743           XSETCAR (tail, make_number (charset->id));
9744           if (max_charset_id < charset->id)
9745             max_charset_id = charset->id;
9746         }
9747     }
9748   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9749
9750   safe_charsets = make_uninit_string (max_charset_id + 1);
9751   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9752   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9753     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9754   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9755
9756   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9757
9758   val = args[coding_arg_decode_translation_table];
9759   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9760     CHECK_SYMBOL (val);
9761   CODING_ATTR_DECODE_TBL (attrs) = val;
9762
9763   val = args[coding_arg_encode_translation_table];
9764   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9765     CHECK_SYMBOL (val);
9766   CODING_ATTR_ENCODE_TBL (attrs) = val;
9767
9768   val = args[coding_arg_post_read_conversion];
9769   CHECK_SYMBOL (val);
9770   CODING_ATTR_POST_READ (attrs) = val;
9771
9772   val = args[coding_arg_pre_write_conversion];
9773   CHECK_SYMBOL (val);
9774   CODING_ATTR_PRE_WRITE (attrs) = val;
9775
9776   val = args[coding_arg_default_char];
9777   if (NILP (val))
9778     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9779   else
9780     {
9781       CHECK_CHARACTER (val);
9782       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9783     }
9784
9785   val = args[coding_arg_for_unibyte];
9786   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9787
9788   val = args[coding_arg_plist];
9789   CHECK_LIST (val);
9790   CODING_ATTR_PLIST (attrs) = val;
9791
9792   if (EQ (coding_type, Qcharset))
9793     {
9794       /* Generate a lisp vector of 256 elements.  Each element is nil,
9795          integer, or a list of charset IDs.
9796
9797          If Nth element is nil, the byte code N is invalid in this
9798          coding system.
9799
9800          If Nth element is a number NUM, N is the first byte of a
9801          charset whose ID is NUM.
9802
9803          If Nth element is a list of charset IDs, N is the first byte
9804          of one of them.  The list is sorted by dimensions of the
9805          charsets.  A charset of smaller dimension comes firtst. */
9806       val = Fmake_vector (make_number (256), Qnil);
9807
9808       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9809         {
9810           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9811           int dim = CHARSET_DIMENSION (charset);
9812           int idx = (dim - 1) * 4;
9813
9814           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9815             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9816
9817           for (i = charset->code_space[idx];
9818                i <= charset->code_space[idx + 1]; i++)
9819             {
9820               Lisp_Object tmp, tmp2;
9821               int dim2;
9822
9823               tmp = AREF (val, i);
9824               if (NILP (tmp))
9825                 tmp = XCAR (tail);
9826               else if (NUMBERP (tmp))
9827                 {
9828                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9829                   if (dim < dim2)
9830                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9831                   else
9832                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9833                 }
9834               else
9835                 {
9836                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9837                     {
9838                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9839                       if (dim < dim2)
9840                         break;
9841                     }
9842                   if (NILP (tmp2))
9843                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9844                   else
9845                     {
9846                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9847                       XSETCAR (tmp2, XCAR (tail));
9848                     }
9849                 }
9850               ASET (val, i, tmp);
9851             }
9852         }
9853       ASET (attrs, coding_attr_charset_valids, val);
9854       category = coding_category_charset;
9855     }
9856   else if (EQ (coding_type, Qccl))
9857     {
9858       Lisp_Object valids;
9859
9860       if (nargs < coding_arg_ccl_max)
9861         goto short_args;
9862
9863       val = args[coding_arg_ccl_decoder];
9864       CHECK_CCL_PROGRAM (val);
9865       if (VECTORP (val))
9866         val = Fcopy_sequence (val);
9867       ASET (attrs, coding_attr_ccl_decoder, val);
9868
9869       val = args[coding_arg_ccl_encoder];
9870       CHECK_CCL_PROGRAM (val);
9871       if (VECTORP (val))
9872         val = Fcopy_sequence (val);
9873       ASET (attrs, coding_attr_ccl_encoder, val);
9874
9875       val = args[coding_arg_ccl_valids];
9876       valids = Fmake_string (make_number (256), make_number (0));
9877       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9878         {
9879           int from, to;
9880
9881           val = Fcar (tail);
9882           if (INTEGERP (val))
9883             {
9884               from = to = XINT (val);
9885               if (from < 0 || from > 255)
9886                 args_out_of_range_3 (val, make_number (0), make_number (255));
9887             }
9888           else
9889             {
9890               CHECK_CONS (val);
9891               CHECK_NATNUM_CAR (val);
9892               CHECK_NATNUM_CDR (val);
9893               from = XINT (XCAR (val));
9894               if (from > 255)
9895                 args_out_of_range_3 (XCAR (val),
9896                                      make_number (0), make_number (255));
9897               to = XINT (XCDR (val));
9898               if (to < from || to > 255)
9899                 args_out_of_range_3 (XCDR (val),
9900                                      XCAR (val), make_number (255));
9901             }
9902           for (i = from; i <= to; i++)
9903             SSET (valids, i, 1);
9904         }
9905       ASET (attrs, coding_attr_ccl_valids, valids);
9906
9907       category = coding_category_ccl;
9908     }
9909   else if (EQ (coding_type, Qutf_16))
9910     {
9911       Lisp_Object bom, endian;
9912
9913       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9914
9915       if (nargs < coding_arg_utf16_max)
9916         goto short_args;
9917
9918       bom = args[coding_arg_utf16_bom];
9919       if (! NILP (bom) && ! EQ (bom, Qt))
9920         {
9921           CHECK_CONS (bom);
9922           val = XCAR (bom);
9923           CHECK_CODING_SYSTEM (val);
9924           val = XCDR (bom);
9925           CHECK_CODING_SYSTEM (val);
9926         }
9927       ASET (attrs, coding_attr_utf_bom, bom);
9928
9929       endian = args[coding_arg_utf16_endian];
9930       CHECK_SYMBOL (endian);
9931       if (NILP (endian))
9932         endian = Qbig;
9933       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9934         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9935       ASET (attrs, coding_attr_utf_16_endian, endian);
9936
9937       category = (CONSP (bom)
9938                   ? coding_category_utf_16_auto
9939                   : NILP (bom)
9940                   ? (EQ (endian, Qbig)
9941                      ? coding_category_utf_16_be_nosig
9942                      : coding_category_utf_16_le_nosig)
9943                   : (EQ (endian, Qbig)
9944                      ? coding_category_utf_16_be
9945                      : coding_category_utf_16_le));
9946     }
9947   else if (EQ (coding_type, Qiso_2022))
9948     {
9949       Lisp_Object initial, reg_usage, request, flags;
9950       int i;
9951
9952       if (nargs < coding_arg_iso2022_max)
9953         goto short_args;
9954
9955       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9956       CHECK_VECTOR (initial);
9957       for (i = 0; i < 4; i++)
9958         {
9959           val = Faref (initial, make_number (i));
9960           if (! NILP (val))
9961             {
9962               struct charset *charset;
9963
9964               CHECK_CHARSET_GET_CHARSET (val, charset);
9965               ASET (initial, i, make_number (CHARSET_ID (charset)));
9966               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9967                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9968             }
9969           else
9970             ASET (initial, i, make_number (-1));
9971         }
9972
9973       reg_usage = args[coding_arg_iso2022_reg_usage];
9974       CHECK_CONS (reg_usage);
9975       CHECK_NUMBER_CAR (reg_usage);
9976       CHECK_NUMBER_CDR (reg_usage);
9977
9978       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9979       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9980         {
9981           int id;
9982           Lisp_Object tmp;
9983
9984           val = Fcar (tail);
9985           CHECK_CONS (val);
9986           tmp = XCAR (val);
9987           CHECK_CHARSET_GET_ID (tmp, id);
9988           CHECK_NATNUM_CDR (val);
9989           if (XINT (XCDR (val)) >= 4)
9990             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9991           XSETCAR (val, make_number (id));
9992         }
9993
9994       flags = args[coding_arg_iso2022_flags];
9995       CHECK_NATNUM (flags);
9996       i = XINT (flags);
9997       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9998         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9999
10000       ASET (attrs, coding_attr_iso_initial, initial);
10001       ASET (attrs, coding_attr_iso_usage, reg_usage);
10002       ASET (attrs, coding_attr_iso_request, request);
10003       ASET (attrs, coding_attr_iso_flags, flags);
10004       setup_iso_safe_charsets (attrs);
10005
10006       if (i & CODING_ISO_FLAG_SEVEN_BITS)
10007         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
10008                           | CODING_ISO_FLAG_SINGLE_SHIFT))
10009                     ? coding_category_iso_7_else
10010                     : EQ (args[coding_arg_charset_list], Qiso_2022)
10011                     ? coding_category_iso_7
10012                     : coding_category_iso_7_tight);
10013       else
10014         {
10015           int id = XINT (AREF (initial, 1));
10016
10017           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
10018                        || EQ (args[coding_arg_charset_list], Qiso_2022)
10019                        || id < 0)
10020                       ? coding_category_iso_8_else
10021                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
10022                       ? coding_category_iso_8_1
10023                       : coding_category_iso_8_2);
10024         }
10025       if (category != coding_category_iso_8_1
10026           && category != coding_category_iso_8_2)
10027         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
10028     }
10029   else if (EQ (coding_type, Qemacs_mule))
10030     {
10031       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
10032         ASET (attrs, coding_attr_emacs_mule_full, Qt);
10033       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10034       category = coding_category_emacs_mule;
10035     }
10036   else if (EQ (coding_type, Qshift_jis))
10037     {
10038
10039       struct charset *charset;
10040
10041       if (XINT (Flength (charset_list)) != 3
10042           && XINT (Flength (charset_list)) != 4)
10043         error ("There should be three or four charsets");
10044
10045       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10046       if (CHARSET_DIMENSION (charset) != 1)
10047         error ("Dimension of charset %s is not one",
10048                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10049       if (CHARSET_ASCII_COMPATIBLE_P (charset))
10050         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10051
10052       charset_list = XCDR (charset_list);
10053       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10054       if (CHARSET_DIMENSION (charset) != 1)
10055         error ("Dimension of charset %s is not one",
10056                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10057
10058       charset_list = XCDR (charset_list);
10059       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10060       if (CHARSET_DIMENSION (charset) != 2)
10061         error ("Dimension of charset %s is not two",
10062                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10063
10064       charset_list = XCDR (charset_list);
10065       if (! NILP (charset_list))
10066         {
10067           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10068           if (CHARSET_DIMENSION (charset) != 2)
10069             error ("Dimension of charset %s is not two",
10070                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10071         }
10072
10073       category = coding_category_sjis;
10074       Vsjis_coding_system = name;
10075     }
10076   else if (EQ (coding_type, Qbig5))
10077     {
10078       struct charset *charset;
10079
10080       if (XINT (Flength (charset_list)) != 2)
10081         error ("There should be just two charsets");
10082
10083       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10084       if (CHARSET_DIMENSION (charset) != 1)
10085         error ("Dimension of charset %s is not one",
10086                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10087       if (CHARSET_ASCII_COMPATIBLE_P (charset))
10088         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10089
10090       charset_list = XCDR (charset_list);
10091       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10092       if (CHARSET_DIMENSION (charset) != 2)
10093         error ("Dimension of charset %s is not two",
10094                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10095
10096       category = coding_category_big5;
10097       Vbig5_coding_system = name;
10098     }
10099   else if (EQ (coding_type, Qraw_text))
10100     {
10101       category = coding_category_raw_text;
10102       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10103     }
10104   else if (EQ (coding_type, Qutf_8))
10105     {
10106       Lisp_Object bom;
10107
10108       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10109
10110       if (nargs < coding_arg_utf8_max)
10111         goto short_args;
10112
10113       bom = args[coding_arg_utf8_bom];
10114       if (! NILP (bom) && ! EQ (bom, Qt))
10115         {
10116           CHECK_CONS (bom);
10117           val = XCAR (bom);
10118           CHECK_CODING_SYSTEM (val);
10119           val = XCDR (bom);
10120           CHECK_CODING_SYSTEM (val);
10121         }
10122       ASET (attrs, coding_attr_utf_bom, bom);
10123
10124       category = (CONSP (bom) ? coding_category_utf_8_auto
10125                   : NILP (bom) ? coding_category_utf_8_nosig
10126                   : coding_category_utf_8_sig);
10127     }
10128   else if (EQ (coding_type, Qundecided))
10129     category = coding_category_undecided;
10130   else
10131     error ("Invalid coding system type: %s",
10132            SDATA (SYMBOL_NAME (coding_type)));
10133
10134   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10135   CODING_ATTR_PLIST (attrs)
10136     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10137                                 CODING_ATTR_PLIST (attrs)));
10138   CODING_ATTR_PLIST (attrs)
10139     = Fcons (QCascii_compatible_p,
10140              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10141                     CODING_ATTR_PLIST (attrs)));
10142
10143   eol_type = args[coding_arg_eol_type];
10144   if (! NILP (eol_type)
10145       && ! EQ (eol_type, Qunix)
10146       && ! EQ (eol_type, Qdos)
10147       && ! EQ (eol_type, Qmac))
10148     error ("Invalid eol-type");
10149
10150   aliases = Fcons (name, Qnil);
10151
10152   if (NILP (eol_type))
10153     {
10154       eol_type = make_subsidiaries (name);
10155       for (i = 0; i < 3; i++)
10156         {
10157           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10158
10159           this_name = AREF (eol_type, i);
10160           this_aliases = Fcons (this_name, Qnil);
10161           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10162           this_spec = Fmake_vector (make_number (3), attrs);
10163           ASET (this_spec, 1, this_aliases);
10164           ASET (this_spec, 2, this_eol_type);
10165           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10166           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10167           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10168           if (NILP (val))
10169             Vcoding_system_alist
10170               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10171                        Vcoding_system_alist);
10172         }
10173     }
10174
10175   spec_vec = Fmake_vector (make_number (3), attrs);
10176   ASET (spec_vec, 1, aliases);
10177   ASET (spec_vec, 2, eol_type);
10178
10179   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10180   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10181   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10182   if (NILP (val))
10183     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10184                                   Vcoding_system_alist);
10185
10186   {
10187     int id = coding_categories[category].id;
10188
10189     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10190       setup_coding_system (name, &coding_categories[category]);
10191   }
10192
10193   return Qnil;
10194
10195  short_args:
10196   return Fsignal (Qwrong_number_of_arguments,
10197                   Fcons (intern ("define-coding-system-internal"),
10198                          make_number (nargs)));
10199 }
10200
10201
10202 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10203        3, 3, 0,
10204        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10205   (coding_system, prop, val)
10206      Lisp_Object coding_system, prop, val;
10207 {
10208   Lisp_Object spec, attrs;
10209
10210   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10211   attrs = AREF (spec, 0);
10212   if (EQ (prop, QCmnemonic))
10213     {
10214       if (! STRINGP (val))
10215         CHECK_CHARACTER (val);
10216       CODING_ATTR_MNEMONIC (attrs) = val;
10217     }
10218   else if (EQ (prop, QCdefault_char))
10219     {
10220       if (NILP (val))
10221         val = make_number (' ');
10222       else
10223         CHECK_CHARACTER (val);
10224       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10225     }
10226   else if (EQ (prop, QCdecode_translation_table))
10227     {
10228       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10229         CHECK_SYMBOL (val);
10230       CODING_ATTR_DECODE_TBL (attrs) = val;
10231     }
10232   else if (EQ (prop, QCencode_translation_table))
10233     {
10234       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10235         CHECK_SYMBOL (val);
10236       CODING_ATTR_ENCODE_TBL (attrs) = val;
10237     }
10238   else if (EQ (prop, QCpost_read_conversion))
10239     {
10240       CHECK_SYMBOL (val);
10241       CODING_ATTR_POST_READ (attrs) = val;
10242     }
10243   else if (EQ (prop, QCpre_write_conversion))
10244     {
10245       CHECK_SYMBOL (val);
10246       CODING_ATTR_PRE_WRITE (attrs) = val;
10247     }
10248   else if (EQ (prop, QCascii_compatible_p))
10249     {
10250       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10251     }
10252
10253   CODING_ATTR_PLIST (attrs)
10254     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10255   return val;
10256 }
10257
10258
10259 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10260        Sdefine_coding_system_alias, 2, 2, 0,
10261        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10262      (alias, coding_system)
10263      Lisp_Object alias, coding_system;
10264 {
10265   Lisp_Object spec, aliases, eol_type, val;
10266
10267   CHECK_SYMBOL (alias);
10268   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10269   aliases = AREF (spec, 1);
10270   /* ALIASES should be a list of length more than zero, and the first
10271      element is a base coding system.  Append ALIAS at the tail of the
10272      list.  */
10273   while (!NILP (XCDR (aliases)))
10274     aliases = XCDR (aliases);
10275   XSETCDR (aliases, Fcons (alias, Qnil));
10276
10277   eol_type = AREF (spec, 2);
10278   if (VECTORP (eol_type))
10279     {
10280       Lisp_Object subsidiaries;
10281       int i;
10282
10283       subsidiaries = make_subsidiaries (alias);
10284       for (i = 0; i < 3; i++)
10285         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10286                                      AREF (eol_type, i));
10287     }
10288
10289   Fputhash (alias, spec, Vcoding_system_hash_table);
10290   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10291   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10292   if (NILP (val))
10293     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10294                                   Vcoding_system_alist);
10295
10296   return Qnil;
10297 }
10298
10299 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10300        1, 1, 0,
10301        doc: /* Return the base of CODING-SYSTEM.
10302 Any alias or subsidiary coding system is not a base coding system.  */)
10303   (coding_system)
10304      Lisp_Object coding_system;
10305 {
10306   Lisp_Object spec, attrs;
10307
10308   if (NILP (coding_system))
10309     return (Qno_conversion);
10310   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10311   attrs = AREF (spec, 0);
10312   return CODING_ATTR_BASE_NAME (attrs);
10313 }
10314
10315 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10316        1, 1, 0,
10317        doc: "Return the property list of CODING-SYSTEM.")
10318      (coding_system)
10319      Lisp_Object coding_system;
10320 {
10321   Lisp_Object spec, attrs;
10322
10323   if (NILP (coding_system))
10324     coding_system = Qno_conversion;
10325   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10326   attrs = AREF (spec, 0);
10327   return CODING_ATTR_PLIST (attrs);
10328 }
10329
10330
10331 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10332        1, 1, 0,
10333        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10334      (coding_system)
10335      Lisp_Object coding_system;
10336 {
10337   Lisp_Object spec;
10338
10339   if (NILP (coding_system))
10340     coding_system = Qno_conversion;
10341   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10342   return AREF (spec, 1);
10343 }
10344
10345 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10346        Scoding_system_eol_type, 1, 1, 0,
10347        doc: /* Return eol-type of CODING-SYSTEM.
10348 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10349
10350 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10351 and CR respectively.
10352
10353 A vector value indicates that a format of end-of-line should be
10354 detected automatically.  Nth element of the vector is the subsidiary
10355 coding system whose eol-type is N.  */)
10356      (coding_system)
10357      Lisp_Object coding_system;
10358 {
10359   Lisp_Object spec, eol_type;
10360   int n;
10361
10362   if (NILP (coding_system))
10363     coding_system = Qno_conversion;
10364   if (! CODING_SYSTEM_P (coding_system))
10365     return Qnil;
10366   spec = CODING_SYSTEM_SPEC (coding_system);
10367   eol_type = AREF (spec, 2);
10368   if (VECTORP (eol_type))
10369     return Fcopy_sequence (eol_type);
10370   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10371   return make_number (n);
10372 }
10373
10374 #endif /* emacs */
10375
10376 \f
10377 /*** 9. Post-amble ***/
10378
10379 void
10380 init_coding_once ()
10381 {
10382   int i;
10383
10384   for (i = 0; i < coding_category_max; i++)
10385     {
10386       coding_categories[i].id = -1;
10387       coding_priorities[i] = i;
10388     }
10389
10390   /* ISO2022 specific initialize routine.  */
10391   for (i = 0; i < 0x20; i++)
10392     iso_code_class[i] = ISO_control_0;
10393   for (i = 0x21; i < 0x7F; i++)
10394     iso_code_class[i] = ISO_graphic_plane_0;
10395   for (i = 0x80; i < 0xA0; i++)
10396     iso_code_class[i] = ISO_control_1;
10397   for (i = 0xA1; i < 0xFF; i++)
10398     iso_code_class[i] = ISO_graphic_plane_1;
10399   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10400   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10401   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10402   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10403   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10404   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10405   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10406   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10407   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10408
10409   for (i = 0; i < 256; i++)
10410     {
10411       emacs_mule_bytes[i] = 1;
10412     }
10413   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10414   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10415   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10416   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10417 }
10418
10419 #ifdef emacs
10420
10421 void
10422 syms_of_coding ()
10423 {
10424   staticpro (&Vcoding_system_hash_table);
10425   {
10426     Lisp_Object args[2];
10427     args[0] = QCtest;
10428     args[1] = Qeq;
10429     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10430   }
10431
10432   staticpro (&Vsjis_coding_system);
10433   Vsjis_coding_system = Qnil;
10434
10435   staticpro (&Vbig5_coding_system);
10436   Vbig5_coding_system = Qnil;
10437
10438   staticpro (&Vcode_conversion_reused_workbuf);
10439   Vcode_conversion_reused_workbuf = Qnil;
10440
10441   staticpro (&Vcode_conversion_workbuf_name);
10442   Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
10443
10444   reused_workbuf_in_use = 0;
10445
10446   DEFSYM (Qcharset, "charset");
10447   DEFSYM (Qtarget_idx, "target-idx");
10448   DEFSYM (Qcoding_system_history, "coding-system-history");
10449   Fset (Qcoding_system_history, Qnil);
10450
10451   /* Target FILENAME is the first argument.  */
10452   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10453   /* Target FILENAME is the third argument.  */
10454   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10455
10456   DEFSYM (Qcall_process, "call-process");
10457   /* Target PROGRAM is the first argument.  */
10458   Fput (Qcall_process, Qtarget_idx, make_number (0));
10459
10460   DEFSYM (Qcall_process_region, "call-process-region");
10461   /* Target PROGRAM is the third argument.  */
10462   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10463
10464   DEFSYM (Qstart_process, "start-process");
10465   /* Target PROGRAM is the third argument.  */
10466   Fput (Qstart_process, Qtarget_idx, make_number (2));
10467
10468   DEFSYM (Qopen_network_stream, "open-network-stream");
10469   /* Target SERVICE is the fourth argument.  */
10470   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10471
10472   DEFSYM (Qcoding_system, "coding-system");
10473   DEFSYM (Qcoding_aliases, "coding-aliases");
10474
10475   DEFSYM (Qeol_type, "eol-type");
10476   DEFSYM (Qunix, "unix");
10477   DEFSYM (Qdos, "dos");
10478
10479   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10480   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10481   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10482   DEFSYM (Qdefault_char, "default-char");
10483   DEFSYM (Qundecided, "undecided");
10484   DEFSYM (Qno_conversion, "no-conversion");
10485   DEFSYM (Qraw_text, "raw-text");
10486
10487   DEFSYM (Qiso_2022, "iso-2022");
10488
10489   DEFSYM (Qutf_8, "utf-8");
10490   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10491
10492   DEFSYM (Qutf_16, "utf-16");
10493   DEFSYM (Qbig, "big");
10494   DEFSYM (Qlittle, "little");
10495
10496   DEFSYM (Qshift_jis, "shift-jis");
10497   DEFSYM (Qbig5, "big5");
10498
10499   DEFSYM (Qcoding_system_p, "coding-system-p");
10500
10501   DEFSYM (Qcoding_system_error, "coding-system-error");
10502   Fput (Qcoding_system_error, Qerror_conditions,
10503         pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
10504   Fput (Qcoding_system_error, Qerror_message,
10505         make_pure_c_string ("Invalid coding system"));
10506
10507   /* Intern this now in case it isn't already done.
10508      Setting this variable twice is harmless.
10509      But don't staticpro it here--that is done in alloc.c.  */
10510   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10511
10512   DEFSYM (Qtranslation_table, "translation-table");
10513   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10514   DEFSYM (Qtranslation_table_id, "translation-table-id");
10515   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10516   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10517
10518   DEFSYM (Qvalid_codes, "valid-codes");
10519
10520   DEFSYM (Qemacs_mule, "emacs-mule");
10521
10522   DEFSYM (QCcategory, ":category");
10523   DEFSYM (QCmnemonic, ":mnemonic");
10524   DEFSYM (QCdefault_char, ":default-char");
10525   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10526   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10527   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10528   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10529   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10530
10531   Vcoding_category_table
10532     = Fmake_vector (make_number (coding_category_max), Qnil);
10533   staticpro (&Vcoding_category_table);
10534   /* Followings are target of code detection.  */
10535   ASET (Vcoding_category_table, coding_category_iso_7,
10536         intern_c_string ("coding-category-iso-7"));
10537   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10538         intern_c_string ("coding-category-iso-7-tight"));
10539   ASET (Vcoding_category_table, coding_category_iso_8_1,
10540         intern_c_string ("coding-category-iso-8-1"));
10541   ASET (Vcoding_category_table, coding_category_iso_8_2,
10542         intern_c_string ("coding-category-iso-8-2"));
10543   ASET (Vcoding_category_table, coding_category_iso_7_else,
10544         intern_c_string ("coding-category-iso-7-else"));
10545   ASET (Vcoding_category_table, coding_category_iso_8_else,
10546         intern_c_string ("coding-category-iso-8-else"));
10547   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10548         intern_c_string ("coding-category-utf-8-auto"));
10549   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10550         intern_c_string ("coding-category-utf-8"));
10551   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10552         intern_c_string ("coding-category-utf-8-sig"));
10553   ASET (Vcoding_category_table, coding_category_utf_16_be,
10554         intern_c_string ("coding-category-utf-16-be"));
10555   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10556         intern_c_string ("coding-category-utf-16-auto"));
10557   ASET (Vcoding_category_table, coding_category_utf_16_le,
10558         intern_c_string ("coding-category-utf-16-le"));
10559   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10560         intern_c_string ("coding-category-utf-16-be-nosig"));
10561   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10562         intern_c_string ("coding-category-utf-16-le-nosig"));
10563   ASET (Vcoding_category_table, coding_category_charset,
10564         intern_c_string ("coding-category-charset"));
10565   ASET (Vcoding_category_table, coding_category_sjis,
10566         intern_c_string ("coding-category-sjis"));
10567   ASET (Vcoding_category_table, coding_category_big5,
10568         intern_c_string ("coding-category-big5"));
10569   ASET (Vcoding_category_table, coding_category_ccl,
10570         intern_c_string ("coding-category-ccl"));
10571   ASET (Vcoding_category_table, coding_category_emacs_mule,
10572         intern_c_string ("coding-category-emacs-mule"));
10573   /* Followings are NOT target of code detection.  */
10574   ASET (Vcoding_category_table, coding_category_raw_text,
10575         intern_c_string ("coding-category-raw-text"));
10576   ASET (Vcoding_category_table, coding_category_undecided,
10577         intern_c_string ("coding-category-undecided"));
10578
10579   DEFSYM (Qinsufficient_source, "insufficient-source");
10580   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10581   DEFSYM (Qinvalid_source, "invalid-source");
10582   DEFSYM (Qinterrupted, "interrupted");
10583   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10584   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10585
10586   defsubr (&Scoding_system_p);
10587   defsubr (&Sread_coding_system);
10588   defsubr (&Sread_non_nil_coding_system);
10589   defsubr (&Scheck_coding_system);
10590   defsubr (&Sdetect_coding_region);
10591   defsubr (&Sdetect_coding_string);
10592   defsubr (&Sfind_coding_systems_region_internal);
10593   defsubr (&Sunencodable_char_position);
10594   defsubr (&Scheck_coding_systems_region);
10595   defsubr (&Sdecode_coding_region);
10596   defsubr (&Sencode_coding_region);
10597   defsubr (&Sdecode_coding_string);
10598   defsubr (&Sencode_coding_string);
10599   defsubr (&Sdecode_sjis_char);
10600   defsubr (&Sencode_sjis_char);
10601   defsubr (&Sdecode_big5_char);
10602   defsubr (&Sencode_big5_char);
10603   defsubr (&Sset_terminal_coding_system_internal);
10604   defsubr (&Sset_safe_terminal_coding_system_internal);
10605   defsubr (&Sterminal_coding_system);
10606   defsubr (&Sset_keyboard_coding_system_internal);
10607   defsubr (&Skeyboard_coding_system);
10608   defsubr (&Sfind_operation_coding_system);
10609   defsubr (&Sset_coding_system_priority);
10610   defsubr (&Sdefine_coding_system_internal);
10611   defsubr (&Sdefine_coding_system_alias);
10612   defsubr (&Scoding_system_put);
10613   defsubr (&Scoding_system_base);
10614   defsubr (&Scoding_system_plist);
10615   defsubr (&Scoding_system_aliases);
10616   defsubr (&Scoding_system_eol_type);
10617   defsubr (&Scoding_system_priority_list);
10618
10619   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10620                doc: /* List of coding systems.
10621
10622 Do not alter the value of this variable manually.  This variable should be
10623 updated by the functions `define-coding-system' and
10624 `define-coding-system-alias'.  */);
10625   Vcoding_system_list = Qnil;
10626
10627   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10628                doc: /* Alist of coding system names.
10629 Each element is one element list of coding system name.
10630 This variable is given to `completing-read' as COLLECTION argument.
10631
10632 Do not alter the value of this variable manually.  This variable should be
10633 updated by the functions `make-coding-system' and
10634 `define-coding-system-alias'.  */);
10635   Vcoding_system_alist = Qnil;
10636
10637   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10638                doc: /* List of coding-categories (symbols) ordered by priority.
10639
10640 On detecting a coding system, Emacs tries code detection algorithms
10641 associated with each coding-category one by one in this order.  When
10642 one algorithm agrees with a byte sequence of source text, the coding
10643 system bound to the corresponding coding-category is selected.
10644
10645 Don't modify this variable directly, but use `set-coding-priority'.  */);
10646   {
10647     int i;
10648
10649     Vcoding_category_list = Qnil;
10650     for (i = coding_category_max - 1; i >= 0; i--)
10651       Vcoding_category_list
10652         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10653                  Vcoding_category_list);
10654   }
10655
10656   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10657                doc: /* Specify the coding system for read operations.
10658 It is useful to bind this variable with `let', but do not set it globally.
10659 If the value is a coding system, it is used for decoding on read operation.
10660 If not, an appropriate element is used from one of the coding system alists.
10661 There are three such tables: `file-coding-system-alist',
10662 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10663   Vcoding_system_for_read = Qnil;
10664
10665   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10666                doc: /* Specify the coding system for write operations.
10667 Programs bind this variable with `let', but you should not set it globally.
10668 If the value is a coding system, it is used for encoding of output,
10669 when writing it to a file and when sending it to a file or subprocess.
10670
10671 If this does not specify a coding system, an appropriate element
10672 is used from one of the coding system alists.
10673 There are three such tables: `file-coding-system-alist',
10674 `process-coding-system-alist', and `network-coding-system-alist'.
10675 For output to files, if the above procedure does not specify a coding system,
10676 the value of `buffer-file-coding-system' is used.  */);
10677   Vcoding_system_for_write = Qnil;
10678
10679   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10680                doc: /*
10681 Coding system used in the latest file or process I/O.  */);
10682   Vlast_coding_system_used = Qnil;
10683
10684   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10685                doc: /*
10686 Error status of the last code conversion.
10687
10688 When an error was detected in the last code conversion, this variable
10689 is set to one of the following symbols.
10690   `insufficient-source'
10691   `inconsistent-eol'
10692   `invalid-source'
10693   `interrupted'
10694   `insufficient-memory'
10695 When no error was detected, the value doesn't change.  So, to check
10696 the error status of a code conversion by this variable, you must
10697 explicitly set this variable to nil before performing code
10698 conversion.  */);
10699   Vlast_code_conversion_error = Qnil;
10700
10701   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10702                doc: /*
10703 *Non-nil means always inhibit code conversion of end-of-line format.
10704 See info node `Coding Systems' and info node `Text and Binary' concerning
10705 such conversion.  */);
10706   inhibit_eol_conversion = 0;
10707
10708   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10709                doc: /*
10710 Non-nil means process buffer inherits coding system of process output.
10711 Bind it to t if the process output is to be treated as if it were a file
10712 read from some filesystem.  */);
10713   inherit_process_coding_system = 0;
10714
10715   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10716                doc: /*
10717 Alist to decide a coding system to use for a file I/O operation.
10718 The format is ((PATTERN . VAL) ...),
10719 where PATTERN is a regular expression matching a file name,
10720 VAL is a coding system, a cons of coding systems, or a function symbol.
10721 If VAL is a coding system, it is used for both decoding and encoding
10722 the file contents.
10723 If VAL is a cons of coding systems, the car part is used for decoding,
10724 and the cdr part is used for encoding.
10725 If VAL is a function symbol, the function must return a coding system
10726 or a cons of coding systems which are used as above.  The function is
10727 called with an argument that is a list of the arguments with which
10728 `find-operation-coding-system' was called.  If the function can't decide
10729 a coding system, it can return `undecided' so that the normal
10730 code-detection is performed.
10731
10732 See also the function `find-operation-coding-system'
10733 and the variable `auto-coding-alist'.  */);
10734   Vfile_coding_system_alist = Qnil;
10735
10736   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10737                doc: /*
10738 Alist to decide a coding system to use for a process I/O operation.
10739 The format is ((PATTERN . VAL) ...),
10740 where PATTERN is a regular expression matching a program name,
10741 VAL is a coding system, a cons of coding systems, or a function symbol.
10742 If VAL is a coding system, it is used for both decoding what received
10743 from the program and encoding what sent to the program.
10744 If VAL is a cons of coding systems, the car part is used for decoding,
10745 and the cdr part is used for encoding.
10746 If VAL is a function symbol, the function must return a coding system
10747 or a cons of coding systems which are used as above.
10748
10749 See also the function `find-operation-coding-system'.  */);
10750   Vprocess_coding_system_alist = Qnil;
10751
10752   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10753                doc: /*
10754 Alist to decide a coding system to use for a network I/O operation.
10755 The format is ((PATTERN . VAL) ...),
10756 where PATTERN is a regular expression matching a network service name
10757 or is a port number to connect to,
10758 VAL is a coding system, a cons of coding systems, or a function symbol.
10759 If VAL is a coding system, it is used for both decoding what received
10760 from the network stream and encoding what sent to the network stream.
10761 If VAL is a cons of coding systems, the car part is used for decoding,
10762 and the cdr part is used for encoding.
10763 If VAL is a function symbol, the function must return a coding system
10764 or a cons of coding systems which are used as above.
10765
10766 See also the function `find-operation-coding-system'.  */);
10767   Vnetwork_coding_system_alist = Qnil;
10768
10769   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10770                doc: /* Coding system to use with system messages.
10771 Also used for decoding keyboard input on X Window system.  */);
10772   Vlocale_coding_system = Qnil;
10773
10774   /* The eol mnemonics are reset in startup.el system-dependently.  */
10775   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10776                doc: /*
10777 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10778   eol_mnemonic_unix = make_pure_c_string (":");
10779
10780   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10781                doc: /*
10782 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10783   eol_mnemonic_dos = make_pure_c_string ("\\");
10784
10785   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10786                doc: /*
10787 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10788   eol_mnemonic_mac = make_pure_c_string ("/");
10789
10790   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10791                doc: /*
10792 *String displayed in mode line when end-of-line format is not yet determined.  */);
10793   eol_mnemonic_undecided = make_pure_c_string (":");
10794
10795   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10796                doc: /*
10797 *Non-nil enables character translation while encoding and decoding.  */);
10798   Venable_character_translation = Qt;
10799
10800   DEFVAR_LISP ("standard-translation-table-for-decode",
10801                &Vstandard_translation_table_for_decode,
10802                doc: /* Table for translating characters while decoding.  */);
10803   Vstandard_translation_table_for_decode = Qnil;
10804
10805   DEFVAR_LISP ("standard-translation-table-for-encode",
10806                &Vstandard_translation_table_for_encode,
10807                doc: /* Table for translating characters while encoding.  */);
10808   Vstandard_translation_table_for_encode = Qnil;
10809
10810   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10811                doc: /* Alist of charsets vs revision numbers.
10812 While encoding, if a charset (car part of an element) is found,
10813 designate it with the escape sequence identifying revision (cdr part
10814 of the element).  */);
10815   Vcharset_revision_table = Qnil;
10816
10817   DEFVAR_LISP ("default-process-coding-system",
10818                &Vdefault_process_coding_system,
10819                doc: /* Cons of coding systems used for process I/O by default.
10820 The car part is used for decoding a process output,
10821 the cdr part is used for encoding a text to be sent to a process.  */);
10822   Vdefault_process_coding_system = Qnil;
10823
10824   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10825                doc: /*
10826 Table of extra Latin codes in the range 128..159 (inclusive).
10827 This is a vector of length 256.
10828 If Nth element is non-nil, the existence of code N in a file
10829 \(or output of subprocess) doesn't prevent it to be detected as
10830 a coding system of ISO 2022 variant which has a flag
10831 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10832 or reading output of a subprocess.
10833 Only 128th through 159th elements have a meaning.  */);
10834   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10835
10836   DEFVAR_LISP ("select-safe-coding-system-function",
10837                &Vselect_safe_coding_system_function,
10838                doc: /*
10839 Function to call to select safe coding system for encoding a text.
10840
10841 If set, this function is called to force a user to select a proper
10842 coding system which can encode the text in the case that a default
10843 coding system used in each operation can't encode the text.  The
10844 function should take care that the buffer is not modified while
10845 the coding system is being selected.
10846
10847 The default value is `select-safe-coding-system' (which see).  */);
10848   Vselect_safe_coding_system_function = Qnil;
10849
10850   DEFVAR_BOOL ("coding-system-require-warning",
10851                &coding_system_require_warning,
10852                doc: /* Internal use only.
10853 If non-nil, on writing a file, `select-safe-coding-system-function' is
10854 called even if `coding-system-for-write' is non-nil.  The command
10855 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10856   coding_system_require_warning = 0;
10857
10858
10859   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10860                &inhibit_iso_escape_detection,
10861                doc: /*
10862 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10863
10864 When Emacs reads text, it tries to detect how the text is encoded.
10865 This code detection is sensitive to escape sequences.  If Emacs sees
10866 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10867 of the ISO2022 encodings, and decodes text by the corresponding coding
10868 system (e.g. `iso-2022-7bit').
10869
10870 However, there may be a case that you want to read escape sequences in
10871 a file as is.  In such a case, you can set this variable to non-nil.
10872 Then the code detection will ignore any escape sequences, and no text is
10873 detected as encoded in some ISO-2022 encoding.  The result is that all
10874 escape sequences become visible in a buffer.
10875
10876 The default value is nil, and it is strongly recommended not to change
10877 it.  That is because many Emacs Lisp source files that contain
10878 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10879 in Emacs's distribution, and they won't be decoded correctly on
10880 reading if you suppress escape sequence detection.
10881
10882 The other way to read escape sequences in a file without decoding is
10883 to explicitly specify some coding system that doesn't use ISO-2022
10884 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10885   inhibit_iso_escape_detection = 0;
10886
10887   DEFVAR_BOOL ("inhibit-null-byte-detection",
10888                &inhibit_null_byte_detection,
10889                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10890 By default, Emacs treats it as binary data, and does not attempt to
10891 decode it.  The effect is as if you specified `no-conversion' for
10892 reading that text.
10893
10894 Set this to non-nil when a regular text happens to include null bytes.
10895 Examples are Index nodes of Info files and null-byte delimited output
10896 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10897 decode text as usual.  */);
10898   inhibit_null_byte_detection = 0;
10899
10900   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10901                doc: /* Char table for translating self-inserting characters.
10902 This is applied to the result of input methods, not their input.
10903 See also `keyboard-translate-table'.
10904
10905 Use of this variable for character code unification was rendered
10906 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10907 internal character representation.  */);
10908     Vtranslation_table_for_input = Qnil;
10909
10910   {
10911     Lisp_Object args[coding_arg_max];
10912     Lisp_Object plist[16];
10913     int i;
10914
10915     for (i = 0; i < coding_arg_max; i++)
10916       args[i] = Qnil;
10917
10918     plist[0] = intern_c_string (":name");
10919     plist[1] = args[coding_arg_name] = Qno_conversion;
10920     plist[2] = intern_c_string (":mnemonic");
10921     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10922     plist[4] = intern_c_string (":coding-type");
10923     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10924     plist[6] = intern_c_string (":ascii-compatible-p");
10925     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10926     plist[8] = intern_c_string (":default-char");
10927     plist[9] = args[coding_arg_default_char] = make_number (0);
10928     plist[10] = intern_c_string (":for-unibyte");
10929     plist[11] = args[coding_arg_for_unibyte] = Qt;
10930     plist[12] = intern_c_string (":docstring");
10931     plist[13] = make_pure_c_string ("Do no conversion.\n\
10932 \n\
10933 When you visit a file with this coding, the file is read into a\n\
10934 unibyte buffer as is, thus each byte of a file is treated as a\n\
10935 character.");
10936     plist[14] = intern_c_string (":eol-type");
10937     plist[15] = args[coding_arg_eol_type] = Qunix;
10938     args[coding_arg_plist] = Flist (16, plist);
10939     Fdefine_coding_system_internal (coding_arg_max, args);
10940
10941     plist[1] = args[coding_arg_name] = Qundecided;
10942     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10943     plist[5] = args[coding_arg_coding_type] = Qundecided;
10944     /* This is already set.
10945        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10946     plist[8] = intern_c_string (":charset-list");
10947     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10948     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10949     plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10950     plist[15] = args[coding_arg_eol_type] = Qnil;
10951     args[coding_arg_plist] = Flist (16, plist);
10952     Fdefine_coding_system_internal (coding_arg_max, args);
10953   }
10954
10955   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10956
10957   {
10958     int i;
10959
10960     for (i = 0; i < coding_category_max; i++)
10961       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10962   }
10963 #if defined (MSDOS) || defined (WINDOWSNT)
10964   system_eol_type = Qdos;
10965 #else
10966   system_eol_type = Qunix;
10967 #endif
10968   staticpro (&system_eol_type);
10969 }
10970
10971 char *
10972 emacs_strerror (error_number)
10973      int error_number;
10974 {
10975   char *str;
10976
10977   synchronize_system_messages_locale ();
10978   str = strerror (error_number);
10979
10980   if (! NILP (Vlocale_coding_system))
10981     {
10982       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10983                                                       Vlocale_coding_system,
10984                                                       0);
10985       str = (char *) SDATA (dec);
10986     }
10987
10988   return str;
10989 }
10990
10991 #endif /* emacs */
10992
10993 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10994    (do not change this comment) */