src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292
 293 #include "lisp.h"
 294 #include "buffer.h"
 295 #include "character.h"
 296 #include "charset.h"
 297 #include "ccl.h"
 298 #include "composite.h"
 299 #include "coding.h"
 300 #include "window.h"
 301 #include "frame.h"
 302 #include "termhooks.h"
 303
 304 Lisp_Object Vcoding_system_hash_table;
 305
 306 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 307 Lisp_Object Qunix, Qdos;
 308 extern Lisp_Object Qmac;        /* frame.c */
 309 Lisp_Object Qbuffer_file_coding_system;
 310 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 311 Lisp_Object Qdefault_char;
 312 Lisp_Object Qno_conversion, Qundecided;
 313 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 314 Lisp_Object Qbig, Qlittle;
 315 Lisp_Object Qcoding_system_history;
 316 Lisp_Object Qvalid_codes;
 317 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 318 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 319 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 320 Lisp_Object QCascii_compatible_p;
 321
 322 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 323 Lisp_Object Qcall_process, Qcall_process_region;
 324 Lisp_Object Qstart_process, Qopen_network_stream;
 325 Lisp_Object Qtarget_idx;
 326
 327 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 328 Lisp_Object Qinterrupted, Qinsufficient_memory;
 329
 330 extern Lisp_Object Qcompletion_ignore_case;
 331
 332 /* If a symbol has this property, evaluate the value to define the
 333    symbol as a coding system.  */
 334 static Lisp_Object Qcoding_system_define_form;
 335
 336 int coding_system_require_warning;
 337
 338 Lisp_Object Vselect_safe_coding_system_function;
 339
 340 /* Mnemonic string for each format of end-of-line.  */
 341 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 342 /* Mnemonic string to indicate format of end-of-line is not yet
 343    decided.  */
 344 Lisp_Object eol_mnemonic_undecided;
 345
 346 /* Format of end-of-line decided by system.  This is Qunix on
 347    Unix and Mac, Qdos on DOS/Windows.
 348    This has an effect only for external encoding (i.e. for output to
 349    file and process), not for in-buffer or Lisp string encoding.  */
 350 static Lisp_Object system_eol_type;
 351
 352 #ifdef emacs
 353
 354 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 355
 356 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 357
 358 /* Coding system emacs-mule and raw-text are for converting only
 359    end-of-line format.  */
 360 Lisp_Object Qemacs_mule, Qraw_text;
 361 Lisp_Object Qutf_8_emacs;
 362
 363 /* Coding-systems are handed between Emacs Lisp programs and C internal
 364    routines by the following three variables.  */
 365 /* Coding-system for reading files and receiving data from process.  */
 366 Lisp_Object Vcoding_system_for_read;
 367 /* Coding-system for writing files and sending data to process.  */
 368 Lisp_Object Vcoding_system_for_write;
 369 /* Coding-system actually used in the latest I/O.  */
 370 Lisp_Object Vlast_coding_system_used;
 371 /* Set to non-nil when an error is detected while code conversion.  */
 372 Lisp_Object Vlast_code_conversion_error;
 373 /* A vector of length 256 which contains information about special
 374    Latin codes (especially for dealing with Microsoft codes).  */
 375 Lisp_Object Vlatin_extra_code_table;
 376
 377 /* Flag to inhibit code conversion of end-of-line format.  */
 378 int inhibit_eol_conversion;
 379
 380 /* Flag to inhibit ISO2022 escape sequence detection.  */
 381 int inhibit_iso_escape_detection;
 382
 383 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 384 int inherit_process_coding_system;
 385
 386 /* Coding system to be used to encode text for terminal display when
 387    terminal coding system is nil.  */
 388 struct coding_system safe_terminal_coding;
 389
 390 Lisp_Object Vfile_coding_system_alist;
 391 Lisp_Object Vprocess_coding_system_alist;
 392 Lisp_Object Vnetwork_coding_system_alist;
 393
 394 Lisp_Object Vlocale_coding_system;
 395
 396 #endif /* emacs */
 397
 398 /* Flag to tell if we look up translation table on character code
 399    conversion.  */
 400 Lisp_Object Venable_character_translation;
 401 /* Standard translation table to look up on decoding (reading).  */
 402 Lisp_Object Vstandard_translation_table_for_decode;
 403 /* Standard translation table to look up on encoding (writing).  */
 404 Lisp_Object Vstandard_translation_table_for_encode;
 405
 406 Lisp_Object Qtranslation_table;
 407 Lisp_Object Qtranslation_table_id;
 408 Lisp_Object Qtranslation_table_for_decode;
 409 Lisp_Object Qtranslation_table_for_encode;
 410
 411 /* Alist of charsets vs revision number.  */
 412 static Lisp_Object Vcharset_revision_table;
 413
 414 /* Default coding systems used for process I/O.  */
 415 Lisp_Object Vdefault_process_coding_system;
 416
 417 /* Char table for translating Quail and self-inserting input.  */
 418 Lisp_Object Vtranslation_table_for_input;
 419
 420 /* Two special coding systems.  */
 421 Lisp_Object Vsjis_coding_system;
 422 Lisp_Object Vbig5_coding_system;
 423
 424 /* ISO2022 section */
 425
 426 #define CODING_ISO_INITIAL(coding, reg)                 \
 427   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 428                      coding_attr_iso_initial),          \
 429                reg)))
 430
 431
 432 #define CODING_ISO_REQUEST(coding, charset_id)  \
 433   ((charset_id <= (coding)->max_charset_id      \
 434     ? (coding)->safe_charsets[charset_id]       \
 435     : -1))
 436
 437
 438 #define CODING_ISO_FLAGS(coding)        \
 439   ((coding)->spec.iso_2022.flags)
 440 #define CODING_ISO_DESIGNATION(coding, reg)     \
 441   ((coding)->spec.iso_2022.current_designation[reg])
 442 #define CODING_ISO_INVOCATION(coding, plane)    \
 443   ((coding)->spec.iso_2022.current_invocation[plane])
 444 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 445   ((coding)->spec.iso_2022.single_shifting)
 446 #define CODING_ISO_BOL(coding)  \
 447   ((coding)->spec.iso_2022.bol)
 448 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 449   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 450
 451 /* Control characters of ISO2022.  */
 452                         /* code */      /* function */
 453 #define ISO_CODE_LF     0x0A            /* line-feed */
 454 #define ISO_CODE_CR     0x0D            /* carriage-return */
 455 #define ISO_CODE_SO     0x0E            /* shift-out */
 456 #define ISO_CODE_SI     0x0F            /* shift-in */
 457 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 458 #define ISO_CODE_ESC    0x1B            /* escape */
 459 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 460 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 461 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 462
 463 /* All code (1-byte) of ISO2022 is classified into one of the
 464    followings.  */
 465 enum iso_code_class_type
 466   {
 467     ISO_control_0,              /* Control codes in the range
 468                                    0x00..0x1F and 0x7F, except for the
 469                                    following 5 codes.  */
 470     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 471     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 472     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 473     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 474     ISO_control_1,              /* Control codes in the range
 475                                    0x80..0x9F, except for the
 476                                    following 3 codes.  */
 477     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 478     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 479     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 480     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 481     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 482     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 483     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 484   };
 485
 486 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 487     `iso-flags' attribute of an iso2022 coding system.  */
 488
 489 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 490    instead of the correct short-form sequence (e.g. ESC $ A).  */
 491 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 492
 493 /* If set, reset graphic planes and registers at end-of-line to the
 494    initial state.  */
 495 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 496
 497 /* If set, reset graphic planes and registers before any control
 498    characters to the initial state.  */
 499 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 500
 501 /* If set, encode by 7-bit environment.  */
 502 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 503
 504 /* If set, use locking-shift function.  */
 505 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 506
 507 /* If set, use single-shift function.  Overwrite
 508    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 509 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 510
 511 /* If set, use designation escape sequence.  */
 512 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 513
 514 /* If set, produce revision number sequence.  */
 515 #define CODING_ISO_FLAG_REVISION        0x0080
 516
 517 /* If set, produce ISO6429's direction specifying sequence.  */
 518 #define CODING_ISO_FLAG_DIRECTION       0x0100
 519
 520 /* If set, assume designation states are reset at beginning of line on
 521    output.  */
 522 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 523
 524 /* If set, designation sequence should be placed at beginning of line
 525    on output.  */
 526 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 527
 528 /* If set, do not encode unsafe charactes on output.  */
 529 #define CODING_ISO_FLAG_SAFE            0x0800
 530
 531 /* If set, extra latin codes (128..159) are accepted as a valid code
 532    on input.  */
 533 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 534
 535 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 536
 537 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 538
 539 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 540
 541 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 542
 543 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 544
 545 /* A character to be produced on output if encoding of the original
 546    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 547 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 548
 549 /* UTF-8 section */
 550 #define CODING_UTF_8_BOM(coding)        \
 551   ((coding)->spec.utf_8_bom)
 552
 553 /* UTF-16 section */
 554 #define CODING_UTF_16_BOM(coding)       \
 555   ((coding)->spec.utf_16.bom)
 556
 557 #define CODING_UTF_16_ENDIAN(coding)    \
 558   ((coding)->spec.utf_16.endian)
 559
 560 #define CODING_UTF_16_SURROGATE(coding) \
 561   ((coding)->spec.utf_16.surrogate)
 562
 563
 564 /* CCL section */
 565 #define CODING_CCL_DECODER(coding)      \
 566   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 567 #define CODING_CCL_ENCODER(coding)      \
 568   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 569 #define CODING_CCL_VALIDS(coding)                                          \
 570   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 571
 572 /* Index for each coding category in `coding_categories' */
 573
 574 enum coding_category
 575   {
 576     coding_category_iso_7,
 577     coding_category_iso_7_tight,
 578     coding_category_iso_8_1,
 579     coding_category_iso_8_2,
 580     coding_category_iso_7_else,
 581     coding_category_iso_8_else,
 582     coding_category_utf_8_auto,
 583     coding_category_utf_8_nosig,
 584     coding_category_utf_8_sig,
 585     coding_category_utf_16_auto,
 586     coding_category_utf_16_be,
 587     coding_category_utf_16_le,
 588     coding_category_utf_16_be_nosig,
 589     coding_category_utf_16_le_nosig,
 590     coding_category_charset,
 591     coding_category_sjis,
 592     coding_category_big5,
 593     coding_category_ccl,
 594     coding_category_emacs_mule,
 595     /* All above are targets of code detection.  */
 596     coding_category_raw_text,
 597     coding_category_undecided,
 598     coding_category_max
 599   };
 600
 601 /* Definitions of flag bits used in detect_coding_XXXX.  */
 602 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 603 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 604 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 605 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 606 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 607 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 608 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 609 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 610 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 611 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 612 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 613 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 614 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 615 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 616 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 617 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 618 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 619 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 620 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 621 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 622
 623 /* This value is returned if detect_coding_mask () find nothing other
 624    than ASCII characters.  */
 625 #define CATEGORY_MASK_ANY               \
 626   (CATEGORY_MASK_ISO_7                  \
 627    | CATEGORY_MASK_ISO_7_TIGHT          \
 628    | CATEGORY_MASK_ISO_8_1              \
 629    | CATEGORY_MASK_ISO_8_2              \
 630    | CATEGORY_MASK_ISO_7_ELSE           \
 631    | CATEGORY_MASK_ISO_8_ELSE           \
 632    | CATEGORY_MASK_UTF_8_AUTO           \
 633    | CATEGORY_MASK_UTF_8_NOSIG          \
 634    | CATEGORY_MASK_UTF_8_SIG            \
 635    | CATEGORY_MASK_UTF_16_AUTO          \
 636    | CATEGORY_MASK_UTF_16_BE            \
 637    | CATEGORY_MASK_UTF_16_LE            \
 638    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 639    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 640    | CATEGORY_MASK_CHARSET              \
 641    | CATEGORY_MASK_SJIS                 \
 642    | CATEGORY_MASK_BIG5                 \
 643    | CATEGORY_MASK_CCL                  \
 644    | CATEGORY_MASK_EMACS_MULE)
 645
 646
 647 #define CATEGORY_MASK_ISO_7BIT \
 648   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 649
 650 #define CATEGORY_MASK_ISO_8BIT \
 651   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 652
 653 #define CATEGORY_MASK_ISO_ELSE \
 654   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 655
 656 #define CATEGORY_MASK_ISO_ESCAPE        \
 657   (CATEGORY_MASK_ISO_7                  \
 658    | CATEGORY_MASK_ISO_7_TIGHT          \
 659    | CATEGORY_MASK_ISO_7_ELSE           \
 660    | CATEGORY_MASK_ISO_8_ELSE)
 661
 662 #define CATEGORY_MASK_ISO       \
 663   (  CATEGORY_MASK_ISO_7BIT     \
 664      | CATEGORY_MASK_ISO_8BIT   \
 665      | CATEGORY_MASK_ISO_ELSE)
 666
 667 #define CATEGORY_MASK_UTF_16            \
 668   (CATEGORY_MASK_UTF_16_AUTO            \
 669    | CATEGORY_MASK_UTF_16_BE            \
 670    | CATEGORY_MASK_UTF_16_LE            \
 671    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 672    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 673
 674 #define CATEGORY_MASK_UTF_8     \
 675   (CATEGORY_MASK_UTF_8_AUTO     \
 676    | CATEGORY_MASK_UTF_8_NOSIG  \
 677    | CATEGORY_MASK_UTF_8_SIG)
 678
 679 /* List of symbols `coding-category-xxx' ordered by priority.  This
 680    variable is exposed to Emacs Lisp.  */
 681 static Lisp_Object Vcoding_category_list;
 682
 683 /* Table of coding categories (Lisp symbols).  This variable is for
 684    internal use oly.  */
 685 static Lisp_Object Vcoding_category_table;
 686
 687 /* Table of coding-categories ordered by priority.  */
 688 static enum coding_category coding_priorities[coding_category_max];
 689
 690 /* Nth element is a coding context for the coding system bound to the
 691    Nth coding category.  */
 692 static struct coding_system coding_categories[coding_category_max];
 693
 694 /*** Commonly used macros and functions ***/
 695
 696 #ifndef min
 697 #define min(a, b) ((a) < (b) ? (a) : (b))
 698 #endif
 699 #ifndef max
 700 #define max(a, b) ((a) > (b) ? (a) : (b))
 701 #endif
 702
 703 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 704   do {                                                  \
 705     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 706     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 707   } while (0)
 708
 709
 710 /* Safely get one byte from the source text pointed by SRC which ends
 711    at SRC_END, and set C to that byte.  If there are not enough bytes
 712    in the source, it jumps to `no_more_source'.  If multibytep is
 713    nonzero, and a multibyte character is found at SRC, set C to the
 714    negative value of the character code.  The caller should declare
 715    and set these variables appropriately in advance:
 716         src, src_end, multibytep */
 717
 718 #define ONE_MORE_BYTE(c)                                \
 719   do {                                                  \
 720     if (src == src_end)                                 \
 721       {                                                 \
 722         if (src_base < src)                             \
 723           record_conversion_result                      \
 724             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 725         goto no_more_source;                            \
 726       }                                                 \
 727     c = *src++;                                         \
 728     if (multibytep && (c & 0x80))                       \
 729       {                                                 \
 730         if ((c & 0xFE) == 0xC0)                         \
 731           c = ((c & 1) << 6) | *src++;                  \
 732         else                                            \
 733           {                                             \
 734             src--;                                      \
 735             c = - string_char (src, &src, NULL);        \
 736             record_conversion_result                    \
 737               (coding, CODING_RESULT_INVALID_SRC);      \
 738           }                                             \
 739       }                                                 \
 740     consumed_chars++;                                   \
 741   } while (0)
 742
 743
 744 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 745   do {                                                  \
 746     c = *src++;                                         \
 747     if (multibytep && (c & 0x80))                       \
 748       {                                                 \
 749         if ((c & 0xFE) == 0xC0)                         \
 750           c = ((c & 1) << 6) | *src++;                  \
 751         else                                            \
 752           {                                             \
 753             src--;                                      \
 754             c = - string_char (src, &src, NULL);        \
 755             record_conversion_result                    \
 756               (coding, CODING_RESULT_INVALID_SRC);      \
 757           }                                             \
 758       }                                                 \
 759     consumed_chars++;                                   \
 760   } while (0)
 761
 762
 763 /* Store a byte C in the place pointed by DST and increment DST to the
 764    next free point, and increment PRODUCED_CHARS.  The caller should
 765    assure that C is 0..127, and declare and set the variable `dst'
 766    appropriately in advance.
 767 */
 768
 769
 770 #define EMIT_ONE_ASCII_BYTE(c)  \
 771   do {                          \
 772     produced_chars++;           \
 773     *dst++ = (c);               \
 774   } while (0)
 775
 776
 777 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 778
 779 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 780   do {                                  \
 781     produced_chars += 2;                \
 782     *dst++ = (c1), *dst++ = (c2);       \
 783   } while (0)
 784
 785
 786 /* Store a byte C in the place pointed by DST and increment DST to the
 787    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 788    nonzero, store in an appropriate multibyte from.  The caller should
 789    declare and set the variables `dst' and `multibytep' appropriately
 790    in advance.  */
 791
 792 #define EMIT_ONE_BYTE(c)                \
 793   do {                                  \
 794     produced_chars++;                   \
 795     if (multibytep)                     \
 796       {                                 \
 797         int ch = (c);                   \
 798         if (ch >= 0x80)                 \
 799           ch = BYTE8_TO_CHAR (ch);      \
 800         CHAR_STRING_ADVANCE (ch, dst);  \
 801       }                                 \
 802     else                                \
 803       *dst++ = (c);                     \
 804   } while (0)
 805
 806
 807 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 808
 809 #define EMIT_TWO_BYTES(c1, c2)          \
 810   do {                                  \
 811     produced_chars += 2;                \
 812     if (multibytep)                     \
 813       {                                 \
 814         int ch;                         \
 815                                         \
 816         ch = (c1);                      \
 817         if (ch >= 0x80)                 \
 818           ch = BYTE8_TO_CHAR (ch);      \
 819         CHAR_STRING_ADVANCE (ch, dst);  \
 820         ch = (c2);                      \
 821         if (ch >= 0x80)                 \
 822           ch = BYTE8_TO_CHAR (ch);      \
 823         CHAR_STRING_ADVANCE (ch, dst);  \
 824       }                                 \
 825     else                                \
 826       {                                 \
 827         *dst++ = (c1);                  \
 828         *dst++ = (c2);                  \
 829       }                                 \
 830   } while (0)
 831
 832
 833 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 834   do {                                  \
 835     EMIT_ONE_BYTE (c1);                 \
 836     EMIT_TWO_BYTES (c2, c3);            \
 837   } while (0)
 838
 839
 840 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 841   do {                                          \
 842     EMIT_TWO_BYTES (c1, c2);                    \
 843     EMIT_TWO_BYTES (c3, c4);                    \
 844   } while (0)
 845
 846
 847 /* Prototypes for static functions.  */
 848 static void record_conversion_result P_ ((struct coding_system *coding,
 849                                           enum coding_result_code result));
 850 static int detect_coding_utf_8 P_ ((struct coding_system *,
 851                                     struct coding_detection_info *info));
 852 static void decode_coding_utf_8 P_ ((struct coding_system *));
 853 static int encode_coding_utf_8 P_ ((struct coding_system *));
 854
 855 static int detect_coding_utf_16 P_ ((struct coding_system *,
 856                                      struct coding_detection_info *info));
 857 static void decode_coding_utf_16 P_ ((struct coding_system *));
 858 static int encode_coding_utf_16 P_ ((struct coding_system *));
 859
 860 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 861                                        struct coding_detection_info *info));
 862 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 863 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 864
 865 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 866                                          struct coding_detection_info *info));
 867 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 868 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 869
 870 static int detect_coding_sjis P_ ((struct coding_system *,
 871                                    struct coding_detection_info *info));
 872 static void decode_coding_sjis P_ ((struct coding_system *));
 873 static int encode_coding_sjis P_ ((struct coding_system *));
 874
 875 static int detect_coding_big5 P_ ((struct coding_system *,
 876                                    struct coding_detection_info *info));
 877 static void decode_coding_big5 P_ ((struct coding_system *));
 878 static int encode_coding_big5 P_ ((struct coding_system *));
 879
 880 static int detect_coding_ccl P_ ((struct coding_system *,
 881                                   struct coding_detection_info *info));
 882 static void decode_coding_ccl P_ ((struct coding_system *));
 883 static int encode_coding_ccl P_ ((struct coding_system *));
 884
 885 static void decode_coding_raw_text P_ ((struct coding_system *));
 886 static int encode_coding_raw_text P_ ((struct coding_system *));
 887
 888 static void coding_set_source P_ ((struct coding_system *));
 889 static void coding_set_destination P_ ((struct coding_system *));
 890 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 891 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 892                                             EMACS_INT, EMACS_INT));
 893 static unsigned char *alloc_destination P_ ((struct coding_system *,
 894                                              EMACS_INT, unsigned char *));
 895 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 896 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 897                                                      int *, int *,
 898                                                      unsigned char *));
 899 static int detect_eol P_ ((const unsigned char *,
 900                            EMACS_INT, enum coding_category));
 901 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 902 static void decode_eol P_ ((struct coding_system *));
 903 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 904 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 905                                         int, int *, int *));
 906 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 907 static INLINE void produce_composition P_ ((struct coding_system *, int *,
 908                                             EMACS_INT));
 909 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 910                                         EMACS_INT));
 911 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 912 static int decode_coding P_ ((struct coding_system *));
 913 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 914                                                       struct coding_system *,
 915                                                       int *, EMACS_INT *));
 916 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 917                                                   struct coding_system *,
 918                                                   int *, EMACS_INT *));
 919 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 920 static int encode_coding P_ ((struct coding_system *));
 921 static Lisp_Object make_conversion_work_buffer P_ ((int));
 922 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 923 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 924 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 925
 926 static void
 927 record_conversion_result (struct coding_system *coding,
 928                           enum coding_result_code result)
 929 {
 930   coding->result = result;
 931   switch (result)
 932     {
 933     case CODING_RESULT_INSUFFICIENT_SRC:
 934       Vlast_code_conversion_error = Qinsufficient_source;
 935       break;
 936     case CODING_RESULT_INCONSISTENT_EOL:
 937       Vlast_code_conversion_error = Qinconsistent_eol;
 938       break;
 939     case CODING_RESULT_INVALID_SRC:
 940       Vlast_code_conversion_error = Qinvalid_source;
 941       break;
 942     case CODING_RESULT_INTERRUPT:
 943       Vlast_code_conversion_error = Qinterrupted;
 944       break;
 945     case CODING_RESULT_INSUFFICIENT_MEM:
 946       Vlast_code_conversion_error = Qinsufficient_memory;
 947       break;
 948     default:
 949       Vlast_code_conversion_error = intern ("Unknown error");
 950     }
 951 }
 952
 953 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 954   do {                                                                       \
 955     charset_map_loaded = 0;                                                  \
 956     c = DECODE_CHAR (charset, code);                                         \
 957     if (charset_map_loaded)                                                  \
 958       {                                                                      \
 959         const unsigned char *orig = coding->source;                          \
 960         EMACS_INT offset;                                                    \
 961                                                                              \
 962         coding_set_source (coding);                                          \
 963         offset = coding->source - orig;                                      \
 964         src += offset;                                                       \
 965         src_base += offset;                                                  \
 966         src_end += offset;                                                   \
 967       }                                                                      \
 968   } while (0)
 969
 970
 971 /* If there are at least BYTES length of room at dst, allocate memory
 972    for coding->destination and update dst and dst_end.  We don't have
 973    to take care of coding->source which will be relocated.  It is
 974    handled by calling coding_set_source in encode_coding.  */
 975
 976 #define ASSURE_DESTINATION(bytes)                               \
 977   do {                                                          \
 978     if (dst + (bytes) >= dst_end)                               \
 979       {                                                         \
 980         int more_bytes = charbuf_end - charbuf + (bytes);       \
 981                                                                 \
 982         dst = alloc_destination (coding, more_bytes, dst);      \
 983         dst_end = coding->destination + coding->dst_bytes;      \
 984       }                                                         \
 985   } while (0)
 986
 987
 988 /* Store multibyte form of the character C in P, and advance P to the
 989    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
 990    never calls MAYBE_UNIFY_CHAR.  */
 991
 992 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
 993   do {                                          \
 994     if ((c) <= MAX_1_BYTE_CHAR)                 \
 995       *(p)++ = (c);                             \
 996     else if ((c) <= MAX_2_BYTE_CHAR)            \
 997       *(p)++ = (0xC0 | ((c) >> 6)),             \
 998         *(p)++ = (0x80 | ((c) & 0x3F));         \
 999     else if ((c) <= MAX_3_BYTE_CHAR)            \
1000       *(p)++ = (0xE0 | ((c) >> 12)),            \
1001         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1002         *(p)++ = (0x80 | ((c) & 0x3F));         \
1003     else if ((c) <= MAX_4_BYTE_CHAR)            \
1004       *(p)++ = (0xF0 | (c >> 18)),              \
1005         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1006         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1007         *(p)++ = (0x80 | (c & 0x3F));           \
1008     else if ((c) <= MAX_5_BYTE_CHAR)            \
1009       *(p)++ = 0xF8,                            \
1010         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1011         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1012         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1013         *(p)++ = (0x80 | (c & 0x3F));           \
1014     else                                        \
1015       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1016   } while (0)
1017
1018
1019 /* Return the character code of character whose multibyte form is at
1020    P, and advance P to the end of the multibyte form.  This is like
1021    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1022
1023 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1024   (!((p)[0] & 0x80)                                             \
1025    ? *(p)++                                                     \
1026    : ! ((p)[0] & 0x20)                                          \
1027    ? ((p) += 2,                                                 \
1028       ((((p)[-2] & 0x1F) << 6)                                  \
1029        | ((p)[-1] & 0x3F)                                       \
1030        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1031    : ! ((p)[0] & 0x10)                                          \
1032    ? ((p) += 3,                                                 \
1033       ((((p)[-3] & 0x0F) << 12)                                 \
1034        | (((p)[-2] & 0x3F) << 6)                                \
1035        | ((p)[-1] & 0x3F)))                                     \
1036    : ! ((p)[0] & 0x08)                                          \
1037    ? ((p) += 4,                                                 \
1038       ((((p)[-4] & 0xF) << 18)                                  \
1039        | (((p)[-3] & 0x3F) << 12)                               \
1040        | (((p)[-2] & 0x3F) << 6)                                \
1041        | ((p)[-1] & 0x3F)))                                     \
1042    : ((p) += 5,                                                 \
1043       ((((p)[-4] & 0x3F) << 18)                                 \
1044        | (((p)[-3] & 0x3F) << 12)                               \
1045        | (((p)[-2] & 0x3F) << 6)                                \
1046        | ((p)[-1] & 0x3F))))
1047
1048
1049 static void
1050 coding_set_source (coding)
1051      struct coding_system *coding;
1052 {
1053   if (BUFFERP (coding->src_object))
1054     {
1055       struct buffer *buf = XBUFFER (coding->src_object);
1056
1057       if (coding->src_pos < 0)
1058         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1059       else
1060         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1061     }
1062   else if (STRINGP (coding->src_object))
1063     {
1064       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1065     }
1066   else
1067     /* Otherwise, the source is C string and is never relocated
1068        automatically.  Thus we don't have to update anything.  */
1069     ;
1070 }
1071
1072 static void
1073 coding_set_destination (coding)
1074      struct coding_system *coding;
1075 {
1076   if (BUFFERP (coding->dst_object))
1077     {
1078       if (coding->src_pos < 0)
1079         {
1080           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1081           coding->dst_bytes = (GAP_END_ADDR
1082                                - (coding->src_bytes - coding->consumed)
1083                                - coding->destination);
1084         }
1085       else
1086         {
1087           /* We are sure that coding->dst_pos_byte is before the gap
1088              of the buffer. */
1089           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1090                                  + coding->dst_pos_byte - BEG_BYTE);
1091           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1092                                - coding->destination);
1093         }
1094     }
1095   else
1096     /* Otherwise, the destination is C string and is never relocated
1097        automatically.  Thus we don't have to update anything.  */
1098     ;
1099 }
1100
1101
1102 static void
1103 coding_alloc_by_realloc (coding, bytes)
1104      struct coding_system *coding;
1105      EMACS_INT bytes;
1106 {
1107   coding->destination = (unsigned char *) xrealloc (coding->destination,
1108                                                     coding->dst_bytes + bytes);
1109   coding->dst_bytes += bytes;
1110 }
1111
1112 static void
1113 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1114      struct coding_system *coding;
1115      EMACS_INT gap_head_used, bytes;
1116 {
1117   if (EQ (coding->src_object, coding->dst_object))
1118     {
1119       /* The gap may contain the produced data at the head and not-yet
1120          consumed data at the tail.  To preserve those data, we at
1121          first make the gap size to zero, then increase the gap
1122          size.  */
1123       EMACS_INT add = GAP_SIZE;
1124
1125       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1126       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1127       make_gap (bytes);
1128       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1129       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1130     }
1131   else
1132     {
1133       Lisp_Object this_buffer;
1134
1135       this_buffer = Fcurrent_buffer ();
1136       set_buffer_internal (XBUFFER (coding->dst_object));
1137       make_gap (bytes);
1138       set_buffer_internal (XBUFFER (this_buffer));
1139     }
1140 }
1141
1142
1143 static unsigned char *
1144 alloc_destination (coding, nbytes, dst)
1145      struct coding_system *coding;
1146      EMACS_INT nbytes;
1147      unsigned char *dst;
1148 {
1149   EMACS_INT offset = dst - coding->destination;
1150
1151   if (BUFFERP (coding->dst_object))
1152     {
1153       struct buffer *buf = XBUFFER (coding->dst_object);
1154
1155       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1156     }
1157   else
1158     coding_alloc_by_realloc (coding, nbytes);
1159   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1160   coding_set_destination (coding);
1161   dst = coding->destination + offset;
1162   return dst;
1163 }
1164
1165 /** Macros for annotations.  */
1166
1167 /* Maximum length of annotation data (sum of annotations for
1168    composition and charset).  */
1169 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1170
1171 /* An annotation data is stored in the array coding->charbuf in this
1172    format:
1173      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1174    LENGTH is the number of elements in the annotation.
1175    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1176    NCHARS is the number of characters in the text annotated.
1177
1178    The format of the following elements depend on ANNOTATION_MASK.
1179
1180    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1181    follows:
1182      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1183    METHOD is one of enum composition_method.
1184    Optionnal COMPOSITION-COMPONENTS are characters and composition
1185    rules.
1186
1187    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1188    follows.  */
1189
1190 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1191   do {                                                  \
1192     *(buf)++ = -(len);                                  \
1193     *(buf)++ = (mask);                                  \
1194     *(buf)++ = (nchars);                                \
1195     coding->annotated = 1;                              \
1196   } while (0);
1197
1198 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1199   do {                                                                      \
1200     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1201     *buf++ = method;                                                        \
1202   } while (0)
1203
1204
1205 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1206   do {                                                                  \
1207     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1208     *buf++ = id;                                                        \
1209   } while (0)
1210
1211 \f
1212 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1213
1214
1215
1216 \f
1217 /*** 3. UTF-8 ***/
1218
1219 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1220    Check if a text is encoded in UTF-8.  If it is, return 1, else
1221    return 0.  */
1222
1223 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1224 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1225 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1226 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1227 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1228 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1229
1230 #define UTF_BOM 0xFEFF
1231 #define UTF_8_BOM_1 0xEF
1232 #define UTF_8_BOM_2 0xBB
1233 #define UTF_8_BOM_3 0xBF
1234
1235 static int
1236 detect_coding_utf_8 (coding, detect_info)
1237      struct coding_system *coding;
1238      struct coding_detection_info *detect_info;
1239 {
1240   const unsigned char *src = coding->source, *src_base;
1241   const unsigned char *src_end = coding->source + coding->src_bytes;
1242   int multibytep = coding->src_multibyte;
1243   int consumed_chars = 0;
1244   int bom_found = 0;
1245   int found = 0;
1246
1247   detect_info->checked |= CATEGORY_MASK_UTF_8;
1248   /* A coding system of this category is always ASCII compatible.  */
1249   src += coding->head_ascii;
1250
1251   while (1)
1252     {
1253       int c, c1, c2, c3, c4;
1254
1255       src_base = src;
1256       ONE_MORE_BYTE (c);
1257       if (c < 0 || UTF_8_1_OCTET_P (c))
1258         continue;
1259       ONE_MORE_BYTE (c1);
1260       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1261         break;
1262       if (UTF_8_2_OCTET_LEADING_P (c))
1263         {
1264           found = 1;
1265           continue;
1266         }
1267       ONE_MORE_BYTE (c2);
1268       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1269         break;
1270       if (UTF_8_3_OCTET_LEADING_P (c))
1271         {
1272           found = 1;
1273           if (src_base == coding->source
1274               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1275             bom_found = 1;
1276           continue;
1277         }
1278       ONE_MORE_BYTE (c3);
1279       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1280         break;
1281       if (UTF_8_4_OCTET_LEADING_P (c))
1282         {
1283           found = 1;
1284           continue;
1285         }
1286       ONE_MORE_BYTE (c4);
1287       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1288         break;
1289       if (UTF_8_5_OCTET_LEADING_P (c))
1290         {
1291           found = 1;
1292           continue;
1293         }
1294       break;
1295     }
1296   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1297   return 0;
1298
1299  no_more_source:
1300   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1301     {
1302       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1303       return 0;
1304     }
1305   if (bom_found)
1306     {
1307       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1308       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1309     }
1310   else
1311     {
1312       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1313       if (found)
1314         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1315     }
1316   return 1;
1317 }
1318
1319
1320 static void
1321 decode_coding_utf_8 (coding)
1322      struct coding_system *coding;
1323 {
1324   const unsigned char *src = coding->source + coding->consumed;
1325   const unsigned char *src_end = coding->source + coding->src_bytes;
1326   const unsigned char *src_base;
1327   int *charbuf = coding->charbuf + coding->charbuf_used;
1328   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1329   int consumed_chars = 0, consumed_chars_base = 0;
1330   int multibytep = coding->src_multibyte;
1331   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1332   Lisp_Object attr, charset_list;
1333   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1334   int byte_after_cr = -1;
1335
1336   CODING_GET_INFO (coding, attr, charset_list);
1337
1338   if (bom != utf_without_bom)
1339     {
1340       int c1, c2, c3;
1341
1342       src_base = src;
1343       ONE_MORE_BYTE (c1);
1344       if (! UTF_8_3_OCTET_LEADING_P (c1))
1345         src = src_base;
1346       else
1347         {
1348           ONE_MORE_BYTE (c2);
1349           if (! UTF_8_EXTRA_OCTET_P (c2))
1350             src = src_base;
1351           else
1352             {
1353               ONE_MORE_BYTE (c3);
1354               if (! UTF_8_EXTRA_OCTET_P (c3))
1355                 src = src_base;
1356               else
1357                 {
1358                   if ((c1 != UTF_8_BOM_1)
1359                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1360                     src = src_base;
1361                   else
1362                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1363                 }
1364             }
1365         }
1366     }
1367   CODING_UTF_8_BOM (coding) = utf_without_bom;
1368
1369
1370
1371   while (1)
1372     {
1373       int c, c1, c2, c3, c4, c5;
1374
1375       src_base = src;
1376       consumed_chars_base = consumed_chars;
1377
1378       if (charbuf >= charbuf_end)
1379         break;
1380
1381       if (byte_after_cr >= 0)
1382         c1 = byte_after_cr, byte_after_cr = -1;
1383       else
1384         ONE_MORE_BYTE (c1);
1385       if (c1 < 0)
1386         {
1387           c = - c1;
1388         }
1389       else if (UTF_8_1_OCTET_P(c1))
1390         {
1391           if (eol_crlf && c1 == '\r')
1392             ONE_MORE_BYTE (byte_after_cr);
1393           c = c1;
1394         }
1395       else
1396         {
1397           ONE_MORE_BYTE (c2);
1398           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1399             goto invalid_code;
1400           if (UTF_8_2_OCTET_LEADING_P (c1))
1401             {
1402               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1403               /* Reject overlong sequences here and below.  Encoders
1404                  producing them are incorrect, they can be misleading,
1405                  and they mess up read/write invariance.  */
1406               if (c < 128)
1407                 goto invalid_code;
1408             }
1409           else
1410             {
1411               ONE_MORE_BYTE (c3);
1412               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1413                 goto invalid_code;
1414               if (UTF_8_3_OCTET_LEADING_P (c1))
1415                 {
1416                   c = (((c1 & 0xF) << 12)
1417                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1418                   if (c < 0x800
1419                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1420                     goto invalid_code;
1421                 }
1422               else
1423                 {
1424                   ONE_MORE_BYTE (c4);
1425                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1426                     goto invalid_code;
1427                   if (UTF_8_4_OCTET_LEADING_P (c1))
1428                     {
1429                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1430                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1431                     if (c < 0x10000)
1432                       goto invalid_code;
1433                     }
1434                   else
1435                     {
1436                       ONE_MORE_BYTE (c5);
1437                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1438                         goto invalid_code;
1439                       if (UTF_8_5_OCTET_LEADING_P (c1))
1440                         {
1441                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1442                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1443                                | (c5 & 0x3F));
1444                           if ((c > MAX_CHAR) || (c < 0x200000))
1445                             goto invalid_code;
1446                         }
1447                       else
1448                         goto invalid_code;
1449                     }
1450                 }
1451             }
1452         }
1453
1454       *charbuf++ = c;
1455       continue;
1456
1457     invalid_code:
1458       src = src_base;
1459       consumed_chars = consumed_chars_base;
1460       ONE_MORE_BYTE (c);
1461       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1462       coding->errors++;
1463     }
1464
1465  no_more_source:
1466   coding->consumed_char += consumed_chars_base;
1467   coding->consumed = src_base - coding->source;
1468   coding->charbuf_used = charbuf - coding->charbuf;
1469 }
1470
1471
1472 static int
1473 encode_coding_utf_8 (coding)
1474      struct coding_system *coding;
1475 {
1476   int multibytep = coding->dst_multibyte;
1477   int *charbuf = coding->charbuf;
1478   int *charbuf_end = charbuf + coding->charbuf_used;
1479   unsigned char *dst = coding->destination + coding->produced;
1480   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1481   int produced_chars = 0;
1482   int c;
1483
1484   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1485     {
1486       ASSURE_DESTINATION (3);
1487       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1488       CODING_UTF_8_BOM (coding) = utf_without_bom;
1489     }
1490
1491   if (multibytep)
1492     {
1493       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1494
1495       while (charbuf < charbuf_end)
1496         {
1497           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1498
1499           ASSURE_DESTINATION (safe_room);
1500           c = *charbuf++;
1501           if (CHAR_BYTE8_P (c))
1502             {
1503               c = CHAR_TO_BYTE8 (c);
1504               EMIT_ONE_BYTE (c);
1505             }
1506           else
1507             {
1508               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1509               for (p = str; p < pend; p++)
1510                 EMIT_ONE_BYTE (*p);
1511             }
1512         }
1513     }
1514   else
1515     {
1516       int safe_room = MAX_MULTIBYTE_LENGTH;
1517
1518       while (charbuf < charbuf_end)
1519         {
1520           ASSURE_DESTINATION (safe_room);
1521           c = *charbuf++;
1522           if (CHAR_BYTE8_P (c))
1523             *dst++ = CHAR_TO_BYTE8 (c);
1524           else
1525             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1526           produced_chars++;
1527         }
1528     }
1529   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1530   coding->produced_char += produced_chars;
1531   coding->produced = dst - coding->destination;
1532   return 0;
1533 }
1534
1535
1536 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1537    Check if a text is encoded in one of UTF-16 based coding systems.
1538    If it is, return 1, else return 0.  */
1539
1540 #define UTF_16_HIGH_SURROGATE_P(val) \
1541   (((val) & 0xFC00) == 0xD800)
1542
1543 #define UTF_16_LOW_SURROGATE_P(val) \
1544   (((val) & 0xFC00) == 0xDC00)
1545
1546 #define UTF_16_INVALID_P(val)   \
1547   (((val) == 0xFFFE)            \
1548    || ((val) == 0xFFFF)         \
1549    || UTF_16_LOW_SURROGATE_P (val))
1550
1551
1552 static int
1553 detect_coding_utf_16 (coding, detect_info)
1554      struct coding_system *coding;
1555      struct coding_detection_info *detect_info;
1556 {
1557   const unsigned char *src = coding->source, *src_base = src;
1558   const unsigned char *src_end = coding->source + coding->src_bytes;
1559   int multibytep = coding->src_multibyte;
1560   int consumed_chars = 0;
1561   int c1, c2;
1562
1563   detect_info->checked |= CATEGORY_MASK_UTF_16;
1564   if (coding->mode & CODING_MODE_LAST_BLOCK
1565       && (coding->src_chars & 1))
1566     {
1567       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1568       return 0;
1569     }
1570
1571   ONE_MORE_BYTE (c1);
1572   ONE_MORE_BYTE (c2);
1573   if ((c1 == 0xFF) && (c2 == 0xFE))
1574     {
1575       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1576                              | CATEGORY_MASK_UTF_16_AUTO);
1577       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1578                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1579                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1580     }
1581   else if ((c1 == 0xFE) && (c2 == 0xFF))
1582     {
1583       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1584                              | CATEGORY_MASK_UTF_16_AUTO);
1585       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1586                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1587                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1588     }
1589   else
1590     {
1591       /* We check the dispersion of Eth and Oth bytes where E is even and
1592          O is odd.  If both are high, we assume binary data.*/
1593       unsigned char e[256], o[256];
1594       unsigned e_num = 1, o_num = 1;
1595
1596       memset (e, 0, 256);
1597       memset (o, 0, 256);
1598       e[c1] = 1;
1599       o[c2] = 1;
1600
1601       detect_info->rejected
1602         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1603
1604       while (1)
1605         {
1606           ONE_MORE_BYTE (c1);
1607           ONE_MORE_BYTE (c2);
1608           if (! e[c1])
1609             {
1610               e[c1] = 1;
1611               e_num++;
1612               if (e_num >= 128)
1613                 break;
1614             }
1615           if (! o[c2])
1616             {
1617               o[c1] = 1;
1618               o_num++;
1619               if (o_num >= 128)
1620                 break;
1621             }
1622         }
1623       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1624       return 0;
1625     }
1626
1627  no_more_source:
1628   return 1;
1629 }
1630
1631 static void
1632 decode_coding_utf_16 (coding)
1633      struct coding_system *coding;
1634 {
1635   const unsigned char *src = coding->source + coding->consumed;
1636   const unsigned char *src_end = coding->source + coding->src_bytes;
1637   const unsigned char *src_base;
1638   int *charbuf = coding->charbuf + coding->charbuf_used;
1639   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1640   int consumed_chars = 0, consumed_chars_base = 0;
1641   int multibytep = coding->src_multibyte;
1642   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1643   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1644   int surrogate = CODING_UTF_16_SURROGATE (coding);
1645   Lisp_Object attr, charset_list;
1646   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1647   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1648
1649   CODING_GET_INFO (coding, attr, charset_list);
1650
1651   if (bom == utf_with_bom)
1652     {
1653       int c, c1, c2;
1654
1655       src_base = src;
1656       ONE_MORE_BYTE (c1);
1657       ONE_MORE_BYTE (c2);
1658       c = (c1 << 8) | c2;
1659
1660       if (endian == utf_16_big_endian
1661           ? c != 0xFEFF : c != 0xFFFE)
1662         {
1663           /* The first two bytes are not BOM.  Treat them as bytes
1664              for a normal character.  */
1665           src = src_base;
1666           coding->errors++;
1667         }
1668       CODING_UTF_16_BOM (coding) = utf_without_bom;
1669     }
1670   else if (bom == utf_detect_bom)
1671     {
1672       /* We have already tried to detect BOM and failed in
1673          detect_coding.  */
1674       CODING_UTF_16_BOM (coding) = utf_without_bom;
1675     }
1676
1677   while (1)
1678     {
1679       int c, c1, c2;
1680
1681       src_base = src;
1682       consumed_chars_base = consumed_chars;
1683
1684       if (charbuf + 2 >= charbuf_end)
1685         break;
1686
1687       if (byte_after_cr1 >= 0)
1688         c1 = byte_after_cr1, byte_after_cr1 = -1;
1689       else
1690         ONE_MORE_BYTE (c1);
1691       if (c1 < 0)
1692         {
1693           *charbuf++ = -c1;
1694           continue;
1695         }
1696       if (byte_after_cr2 >= 0)
1697         c2 = byte_after_cr2, byte_after_cr2 = -1;
1698       else
1699         ONE_MORE_BYTE (c2);
1700       if (c2 < 0)
1701         {
1702           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1703           *charbuf++ = -c2;
1704           continue;
1705         }
1706       c = (endian == utf_16_big_endian
1707            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1708
1709       if (surrogate)
1710         {
1711           if (! UTF_16_LOW_SURROGATE_P (c))
1712             {
1713               if (endian == utf_16_big_endian)
1714                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1715               else
1716                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1717               *charbuf++ = c1;
1718               *charbuf++ = c2;
1719               coding->errors++;
1720               if (UTF_16_HIGH_SURROGATE_P (c))
1721                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1722               else
1723                 *charbuf++ = c;
1724             }
1725           else
1726             {
1727               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1728               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1729               *charbuf++ = 0x10000 + c;
1730             }
1731         }
1732       else
1733         {
1734           if (UTF_16_HIGH_SURROGATE_P (c))
1735             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1736           else
1737             {
1738               if (eol_crlf && c == '\r')
1739                 {
1740                   ONE_MORE_BYTE (byte_after_cr1);
1741                   ONE_MORE_BYTE (byte_after_cr2);
1742                 }
1743               *charbuf++ = c;
1744             }
1745         }
1746     }
1747
1748  no_more_source:
1749   coding->consumed_char += consumed_chars_base;
1750   coding->consumed = src_base - coding->source;
1751   coding->charbuf_used = charbuf - coding->charbuf;
1752 }
1753
1754 static int
1755 encode_coding_utf_16 (coding)
1756      struct coding_system *coding;
1757 {
1758   int multibytep = coding->dst_multibyte;
1759   int *charbuf = coding->charbuf;
1760   int *charbuf_end = charbuf + coding->charbuf_used;
1761   unsigned char *dst = coding->destination + coding->produced;
1762   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1763   int safe_room = 8;
1764   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1765   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1766   int produced_chars = 0;
1767   Lisp_Object attrs, charset_list;
1768   int c;
1769
1770   CODING_GET_INFO (coding, attrs, charset_list);
1771
1772   if (bom != utf_without_bom)
1773     {
1774       ASSURE_DESTINATION (safe_room);
1775       if (big_endian)
1776         EMIT_TWO_BYTES (0xFE, 0xFF);
1777       else
1778         EMIT_TWO_BYTES (0xFF, 0xFE);
1779       CODING_UTF_16_BOM (coding) = utf_without_bom;
1780     }
1781
1782   while (charbuf < charbuf_end)
1783     {
1784       ASSURE_DESTINATION (safe_room);
1785       c = *charbuf++;
1786       if (c >= MAX_UNICODE_CHAR)
1787         c = coding->default_char;
1788
1789       if (c < 0x10000)
1790         {
1791           if (big_endian)
1792             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1793           else
1794             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1795         }
1796       else
1797         {
1798           int c1, c2;
1799
1800           c -= 0x10000;
1801           c1 = (c >> 10) + 0xD800;
1802           c2 = (c & 0x3FF) + 0xDC00;
1803           if (big_endian)
1804             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1805           else
1806             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1807         }
1808     }
1809   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1810   coding->produced = dst - coding->destination;
1811   coding->produced_char += produced_chars;
1812   return 0;
1813 }
1814
1815 \f
1816 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1817
1818 /* Emacs' internal format for representation of multiple character
1819    sets is a kind of multi-byte encoding, i.e. characters are
1820    represented by variable-length sequences of one-byte codes.
1821
1822    ASCII characters and control characters (e.g. `tab', `newline') are
1823    represented by one-byte sequences which are their ASCII codes, in
1824    the range 0x00 through 0x7F.
1825
1826    8-bit characters of the range 0x80..0x9F are represented by
1827    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1828    code + 0x20).
1829
1830    8-bit characters of the range 0xA0..0xFF are represented by
1831    one-byte sequences which are their 8-bit code.
1832
1833    The other characters are represented by a sequence of `base
1834    leading-code', optional `extended leading-code', and one or two
1835    `position-code's.  The length of the sequence is determined by the
1836    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1837    whereas extended leading-code and position-code take the range 0xA0
1838    through 0xFF.  See `charset.h' for more details about leading-code
1839    and position-code.
1840
1841    --- CODE RANGE of Emacs' internal format ---
1842    character set        range
1843    -------------        -----
1844    ascii                0x00..0x7F
1845    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1846    eight-bit-graphic    0xA0..0xBF
1847    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1848    ---------------------------------------------
1849
1850    As this is the internal character representation, the format is
1851    usually not used externally (i.e. in a file or in a data sent to a
1852    process).  But, it is possible to have a text externally in this
1853    format (i.e. by encoding by the coding system `emacs-mule').
1854
1855    In that case, a sequence of one-byte codes has a slightly different
1856    form.
1857
1858    At first, all characters in eight-bit-control are represented by
1859    one-byte sequences which are their 8-bit code.
1860
1861    Next, character composition data are represented by the byte
1862    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1863    where,
1864         METHOD is 0xF0 plus one of composition method (enum
1865         composition_method),
1866
1867         BYTES is 0xA0 plus a byte length of this composition data,
1868
1869         CHARS is 0x20 plus a number of characters composed by this
1870         data,
1871
1872         COMPONENTs are characters of multibye form or composition
1873         rules encoded by two-byte of ASCII codes.
1874
1875    In addition, for backward compatibility, the following formats are
1876    also recognized as composition data on decoding.
1877
1878    0x80 MSEQ ...
1879    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1880
1881    Here,
1882         MSEQ is a multibyte form but in these special format:
1883           ASCII: 0xA0 ASCII_CODE+0x80,
1884           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1885         RULE is a one byte code of the range 0xA0..0xF0 that
1886         represents a composition rule.
1887   */
1888
1889 char emacs_mule_bytes[256];
1890
1891 int
1892 emacs_mule_char (coding, src, nbytes, nchars, id)
1893      struct coding_system *coding;
1894      const unsigned char *src;
1895      int *nbytes, *nchars, *id;
1896 {
1897   const unsigned char *src_end = coding->source + coding->src_bytes;
1898   const unsigned char *src_base = src;
1899   int multibytep = coding->src_multibyte;
1900   struct charset *charset;
1901   unsigned code;
1902   int c;
1903   int consumed_chars = 0;
1904
1905   ONE_MORE_BYTE (c);
1906   if (c < 0)
1907     {
1908       c = -c;
1909       charset = emacs_mule_charset[0];
1910     }
1911   else
1912     {
1913       if (c >= 0xA0)
1914         {
1915           /* Old style component character of a composition.  */
1916           if (c == 0xA0)
1917             {
1918               ONE_MORE_BYTE (c);
1919               c -= 0x80;
1920             }
1921           else
1922             c -= 0x20;
1923         }
1924
1925       switch (emacs_mule_bytes[c])
1926         {
1927         case 2:
1928           if (! (charset = emacs_mule_charset[c]))
1929             goto invalid_code;
1930           ONE_MORE_BYTE (c);
1931           if (c < 0xA0)
1932             goto invalid_code;
1933           code = c & 0x7F;
1934           break;
1935
1936         case 3:
1937           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1938               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1939             {
1940               ONE_MORE_BYTE (c);
1941               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
1942                 goto invalid_code;
1943               ONE_MORE_BYTE (c);
1944               if (c < 0xA0)
1945                 goto invalid_code;
1946               code = c & 0x7F;
1947             }
1948           else
1949             {
1950               if (! (charset = emacs_mule_charset[c]))
1951                 goto invalid_code;
1952               ONE_MORE_BYTE (c);
1953               if (c < 0xA0)
1954                 goto invalid_code;
1955               code = (c & 0x7F) << 8;
1956               ONE_MORE_BYTE (c);
1957               if (c < 0xA0)
1958                 goto invalid_code;
1959               code |= c & 0x7F;
1960             }
1961           break;
1962
1963         case 4:
1964           ONE_MORE_BYTE (c);
1965           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1966             goto invalid_code;
1967           ONE_MORE_BYTE (c);
1968           if (c < 0xA0)
1969             goto invalid_code;
1970           code = (c & 0x7F) << 8;
1971           ONE_MORE_BYTE (c);
1972           if (c < 0xA0)
1973             goto invalid_code;
1974           code |= c & 0x7F;
1975           break;
1976
1977         case 1:
1978           code = c;
1979           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1980                                      ? charset_ascii : charset_eight_bit);
1981           break;
1982
1983         default:
1984           abort ();
1985         }
1986       c = DECODE_CHAR (charset, code);
1987       if (c < 0)
1988         goto invalid_code;
1989     }
1990   *nbytes = src - src_base;
1991   *nchars = consumed_chars;
1992   if (id)
1993     *id = charset->id;
1994   return c;
1995
1996  no_more_source:
1997   return -2;
1998
1999  invalid_code:
2000   return -1;
2001 }
2002
2003
2004 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2005    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
2006    else return 0.  */
2007
2008 static int
2009 detect_coding_emacs_mule (coding, detect_info)
2010      struct coding_system *coding;
2011      struct coding_detection_info *detect_info;
2012 {
2013   const unsigned char *src = coding->source, *src_base;
2014   const unsigned char *src_end = coding->source + coding->src_bytes;
2015   int multibytep = coding->src_multibyte;
2016   int consumed_chars = 0;
2017   int c;
2018   int found = 0;
2019
2020   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
2021   /* A coding system of this category is always ASCII compatible.  */
2022   src += coding->head_ascii;
2023
2024   while (1)
2025     {
2026       src_base = src;
2027       ONE_MORE_BYTE (c);
2028       if (c < 0)
2029         continue;
2030       if (c == 0x80)
2031         {
2032           /* Perhaps the start of composite character.  We simple skip
2033              it because analyzing it is too heavy for detecting.  But,
2034              at least, we check that the composite character
2035              constitutes of more than 4 bytes.  */
2036           const unsigned char *src_base;
2037
2038         repeat:
2039           src_base = src;
2040           do
2041             {
2042               ONE_MORE_BYTE (c);
2043             }
2044           while (c >= 0xA0);
2045
2046           if (src - src_base <= 4)
2047             break;
2048           found = CATEGORY_MASK_EMACS_MULE;
2049           if (c == 0x80)
2050             goto repeat;
2051         }
2052
2053       if (c < 0x80)
2054         {
2055           if (c < 0x20
2056               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2057             break;
2058         }
2059       else
2060         {
2061           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2062
2063           while (more_bytes > 0)
2064             {
2065               ONE_MORE_BYTE (c);
2066               if (c < 0xA0)
2067                 {
2068                   src--;        /* Unread the last byte.  */
2069                   break;
2070                 }
2071               more_bytes--;
2072             }
2073           if (more_bytes != 0)
2074             break;
2075           found = CATEGORY_MASK_EMACS_MULE;
2076         }
2077     }
2078   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2079   return 0;
2080
2081  no_more_source:
2082   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2083     {
2084       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2085       return 0;
2086     }
2087   detect_info->found |= found;
2088   return 1;
2089 }
2090
2091
2092 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2093
2094 /* Decode a character represented as a component of composition
2095    sequence of Emacs 20/21 style at SRC.  Set C to that character and
2096    update SRC to the head of next character (or an encoded composition
2097    rule).  If SRC doesn't points a composition component, set C to -1.
2098    If SRC points an invalid byte sequence, global exit by a return
2099    value 0.  */
2100
2101 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
2102   do                                                            \
2103     {                                                           \
2104       int c;                                                    \
2105       int nbytes, nchars;                                       \
2106                                                                 \
2107       if (src == src_end)                                       \
2108         break;                                                  \
2109       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
2110       if (c < 0)                                                \
2111         {                                                       \
2112           if (c == -2)                                          \
2113             break;                                              \
2114           goto invalid_code;                                    \
2115         }                                                       \
2116       *buf++ = c;                                               \
2117       src += nbytes;                                            \
2118       consumed_chars += nchars;                                 \
2119     }                                                           \
2120   while (0)
2121
2122
2123 /* Decode a composition rule represented as a component of composition
2124    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
2125    and increment BUF.  If SRC points an invalid byte sequence, set C
2126    to -1.  */
2127
2128 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
2129   do {                                                  \
2130     int c, gref, nref;                                  \
2131                                                         \
2132     if (src >= src_end)                                 \
2133       goto invalid_code;                                \
2134     ONE_MORE_BYTE_NO_CHECK (c);                         \
2135     c -= 0xA0;                                          \
2136     if (c < 0 || c >= 81)                               \
2137       goto invalid_code;                                \
2138                                                         \
2139     gref = c / 9, nref = c % 9;                         \
2140     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2141   } while (0)
2142
2143
2144 /* Decode a composition rule represented as a component of composition
2145    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
2146    and increment BUF.  If SRC points an invalid byte sequence, set C
2147    to -1.  */
2148
2149 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
2150   do {                                                  \
2151     int gref, nref;                                     \
2152                                                         \
2153     if (src + 1>= src_end)                              \
2154       goto invalid_code;                                \
2155     ONE_MORE_BYTE_NO_CHECK (gref);                      \
2156     gref -= 0x20;                                       \
2157     ONE_MORE_BYTE_NO_CHECK (nref);                      \
2158     nref -= 0x20;                                       \
2159     if (gref < 0 || gref >= 81                          \
2160         || nref < 0 || nref >= 81)                      \
2161       goto invalid_code;                                \
2162     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2163   } while (0)
2164
2165
2166 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
2167   do {                                                                  \
2168     /* Emacs 21 style format.  The first three bytes at SRC are         \
2169        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
2170        the byte length of this composition information, CHARS is the    \
2171        number of characters composed by this composition.  */           \
2172     enum composition_method method = c - 0xF2;                          \
2173     int *charbuf_base = charbuf;                                        \
2174     int consumed_chars_limit;                                           \
2175     int nbytes, nchars;                                                 \
2176                                                                         \
2177     ONE_MORE_BYTE (c);                                                  \
2178     if (c < 0)                                                          \
2179       goto invalid_code;                                                \
2180     nbytes = c - 0xA0;                                                  \
2181     if (nbytes < 3)                                                     \
2182       goto invalid_code;                                                \
2183     ONE_MORE_BYTE (c);                                                  \
2184     if (c < 0)                                                          \
2185       goto invalid_code;                                                \
2186     nchars = c - 0xA0;                                                  \
2187     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2188     consumed_chars_limit = consumed_chars_base + nbytes;                \
2189     if (method != COMPOSITION_RELATIVE)                                 \
2190       {                                                                 \
2191         int i = 0;                                                      \
2192         while (consumed_chars < consumed_chars_limit)                   \
2193           {                                                             \
2194             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
2195               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
2196             else                                                        \
2197               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
2198             i++;                                                        \
2199           }                                                             \
2200         if (consumed_chars < consumed_chars_limit)                      \
2201           goto invalid_code;                                            \
2202         charbuf_base[0] -= i;                                           \
2203       }                                                                 \
2204   } while (0)
2205
2206
2207 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)                    \
2208   do {                                                                  \
2209     /* Emacs 20 style format for relative composition.  */              \
2210     /* Store multibyte form of characters to be composed.  */           \
2211     enum composition_method method = COMPOSITION_RELATIVE;              \
2212     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];                 \
2213     int *buf = components;                                              \
2214     int i, j;                                                           \
2215                                                                         \
2216     src = src_base;                                                     \
2217     ONE_MORE_BYTE (c);          /* skip 0x80 */                         \
2218     for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++)    \
2219       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                         \
2220     if (i < 2)                                                          \
2221       goto invalid_code;                                                \
2222     ADD_COMPOSITION_DATA (charbuf, i, method);                          \
2223     for (j = 0; j < i; j++)                                             \
2224       *charbuf++ = components[j];                                       \
2225   } while (0)
2226
2227
2228 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
2229   do {                                                          \
2230     /* Emacs 20 style format for rule-base composition.  */     \
2231     /* Store multibyte form of characters to be composed.  */   \
2232     enum composition_method method = COMPOSITION_WITH_RULE;     \
2233     int *charbuf_base = charbuf;                                \
2234     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
2235     int *buf = components;                                      \
2236     int i, j;                                                   \
2237                                                                 \
2238     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
2239     for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2240       {                                                         \
2241         if (*src < 0xA0)                                        \
2242           break;                                                \
2243         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
2244         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
2245       }                                                         \
2246     if (i <= 1 || (buf - components) % 2 == 0)                  \
2247       goto invalid_code;                                        \
2248     if (charbuf + i + (i / 2) + 1 >= charbuf_end)               \
2249       goto no_more_source;                                      \
2250     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
2251     i = i * 2 - 1;                                              \
2252     for (j = 0; j < i; j++)                                     \
2253       *charbuf++ = components[j];                               \
2254     charbuf_base[0] -= i;                                       \
2255     for (j = 0; j < i; j += 2)                                  \
2256       *charbuf++ = components[j];                               \
2257   } while (0)
2258
2259
2260 static void
2261 decode_coding_emacs_mule (coding)
2262      struct coding_system *coding;
2263 {
2264   const unsigned char *src = coding->source + coding->consumed;
2265   const unsigned char *src_end = coding->source + coding->src_bytes;
2266   const unsigned char *src_base;
2267   int *charbuf = coding->charbuf + coding->charbuf_used;
2268   int *charbuf_end
2269     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2270   int consumed_chars = 0, consumed_chars_base;
2271   int multibytep = coding->src_multibyte;
2272   Lisp_Object attrs, charset_list;
2273   int char_offset = coding->produced_char;
2274   int last_offset = char_offset;
2275   int last_id = charset_ascii;
2276   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2277   int byte_after_cr = -1;
2278
2279   CODING_GET_INFO (coding, attrs, charset_list);
2280
2281   while (1)
2282     {
2283       int c;
2284
2285       src_base = src;
2286       consumed_chars_base = consumed_chars;
2287
2288       if (charbuf >= charbuf_end)
2289         break;
2290
2291       if (byte_after_cr >= 0)
2292         c = byte_after_cr, byte_after_cr = -1;
2293       else
2294         ONE_MORE_BYTE (c);
2295       if (c < 0)
2296         {
2297           *charbuf++ = -c;
2298           char_offset++;
2299         }
2300       else if (c < 0x80)
2301         {
2302           if (eol_crlf && c == '\r')
2303             ONE_MORE_BYTE (byte_after_cr);
2304           *charbuf++ = c;
2305           char_offset++;
2306         }
2307       else if (c == 0x80)
2308         {
2309           ONE_MORE_BYTE (c);
2310           if (c < 0)
2311             goto invalid_code;
2312           if (c - 0xF2 >= COMPOSITION_RELATIVE
2313               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2314             DECODE_EMACS_MULE_21_COMPOSITION (c);
2315           else if (c < 0xC0)
2316             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2317           else if (c == 0xFF)
2318             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2319           else
2320             goto invalid_code;
2321         }
2322       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2323         {
2324           int nbytes, nchars;
2325           int id;
2326
2327           src = src_base;
2328           consumed_chars = consumed_chars_base;
2329           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2330           if (c < 0)
2331             {
2332               if (c == -2)
2333                 break;
2334               goto invalid_code;
2335             }
2336           if (last_id != id)
2337             {
2338               if (last_id != charset_ascii)
2339                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2340               last_id = id;
2341               last_offset = char_offset;
2342             }
2343           *charbuf++ = c;
2344           src += nbytes;
2345           consumed_chars += nchars;
2346           char_offset++;
2347         }
2348       else
2349         goto invalid_code;
2350       continue;
2351
2352     invalid_code:
2353       src = src_base;
2354       consumed_chars = consumed_chars_base;
2355       ONE_MORE_BYTE (c);
2356       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2357       char_offset++;
2358       coding->errors++;
2359     }
2360
2361  no_more_source:
2362   if (last_id != charset_ascii)
2363     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2364   coding->consumed_char += consumed_chars_base;
2365   coding->consumed = src_base - coding->source;
2366   coding->charbuf_used = charbuf - coding->charbuf;
2367 }
2368
2369
2370 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2371   do {                                          \
2372     if (id < 0xA0)                              \
2373       codes[0] = id, codes[1] = 0;              \
2374     else if (id < 0xE0)                         \
2375       codes[0] = 0x9A, codes[1] = id;           \
2376     else if (id < 0xF0)                         \
2377       codes[0] = 0x9B, codes[1] = id;           \
2378     else if (id < 0xF5)                         \
2379       codes[0] = 0x9C, codes[1] = id;           \
2380     else                                        \
2381       codes[0] = 0x9D, codes[1] = id;           \
2382   } while (0);
2383
2384
2385 static int
2386 encode_coding_emacs_mule (coding)
2387      struct coding_system *coding;
2388 {
2389   int multibytep = coding->dst_multibyte;
2390   int *charbuf = coding->charbuf;
2391   int *charbuf_end = charbuf + coding->charbuf_used;
2392   unsigned char *dst = coding->destination + coding->produced;
2393   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2394   int safe_room = 8;
2395   int produced_chars = 0;
2396   Lisp_Object attrs, charset_list;
2397   int c;
2398   int preferred_charset_id = -1;
2399
2400   CODING_GET_INFO (coding, attrs, charset_list);
2401   if (! EQ (charset_list, Vemacs_mule_charset_list))
2402     {
2403       CODING_ATTR_CHARSET_LIST (attrs)
2404         = charset_list = Vemacs_mule_charset_list;
2405     }
2406
2407   while (charbuf < charbuf_end)
2408     {
2409       ASSURE_DESTINATION (safe_room);
2410       c = *charbuf++;
2411
2412       if (c < 0)
2413         {
2414           /* Handle an annotation.  */
2415           switch (*charbuf)
2416             {
2417             case CODING_ANNOTATE_COMPOSITION_MASK:
2418               /* Not yet implemented.  */
2419               break;
2420             case CODING_ANNOTATE_CHARSET_MASK:
2421               preferred_charset_id = charbuf[3];
2422               if (preferred_charset_id >= 0
2423                   && NILP (Fmemq (make_number (preferred_charset_id),
2424                                   charset_list)))
2425                 preferred_charset_id = -1;
2426               break;
2427             default:
2428               abort ();
2429             }
2430           charbuf += -c - 1;
2431           continue;
2432         }
2433
2434       if (ASCII_CHAR_P (c))
2435         EMIT_ONE_ASCII_BYTE (c);
2436       else if (CHAR_BYTE8_P (c))
2437         {
2438           c = CHAR_TO_BYTE8 (c);
2439           EMIT_ONE_BYTE (c);
2440         }
2441       else
2442         {
2443           struct charset *charset;
2444           unsigned code;
2445           int dimension;
2446           int emacs_mule_id;
2447           unsigned char leading_codes[2];
2448
2449           if (preferred_charset_id >= 0)
2450             {
2451               charset = CHARSET_FROM_ID (preferred_charset_id);
2452               if (CHAR_CHARSET_P (c, charset))
2453                 code = ENCODE_CHAR (charset, c);
2454               else
2455                 charset = char_charset (c, charset_list, &code);
2456             }
2457           else
2458             charset = char_charset (c, charset_list, &code);
2459           if (! charset)
2460             {
2461               c = coding->default_char;
2462               if (ASCII_CHAR_P (c))
2463                 {
2464                   EMIT_ONE_ASCII_BYTE (c);
2465                   continue;
2466                 }
2467               charset = char_charset (c, charset_list, &code);
2468             }
2469           dimension = CHARSET_DIMENSION (charset);
2470           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2471           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2472           EMIT_ONE_BYTE (leading_codes[0]);
2473           if (leading_codes[1])
2474             EMIT_ONE_BYTE (leading_codes[1]);
2475           if (dimension == 1)
2476             EMIT_ONE_BYTE (code | 0x80);
2477           else
2478             {
2479               code |= 0x8080;
2480               EMIT_ONE_BYTE (code >> 8);
2481               EMIT_ONE_BYTE (code & 0xFF);
2482             }
2483         }
2484     }
2485   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2486   coding->produced_char += produced_chars;
2487   coding->produced = dst - coding->destination;
2488   return 0;
2489 }
2490
2491 \f
2492 /*** 7. ISO2022 handlers ***/
2493
2494 /* The following note describes the coding system ISO2022 briefly.
2495    Since the intention of this note is to help understand the
2496    functions in this file, some parts are NOT ACCURATE or are OVERLY
2497    SIMPLIFIED.  For thorough understanding, please refer to the
2498    original document of ISO2022.  This is equivalent to the standard
2499    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2500
2501    ISO2022 provides many mechanisms to encode several character sets
2502    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2503    is encoded using bytes less than 128.  This may make the encoded
2504    text a little bit longer, but the text passes more easily through
2505    several types of gateway, some of which strip off the MSB (Most
2506    Significant Bit).
2507
2508    There are two kinds of character sets: control character sets and
2509    graphic character sets.  The former contain control characters such
2510    as `newline' and `escape' to provide control functions (control
2511    functions are also provided by escape sequences).  The latter
2512    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2513    two control character sets and many graphic character sets.
2514
2515    Graphic character sets are classified into one of the following
2516    four classes, according to the number of bytes (DIMENSION) and
2517    number of characters in one dimension (CHARS) of the set:
2518    - DIMENSION1_CHARS94
2519    - DIMENSION1_CHARS96
2520    - DIMENSION2_CHARS94
2521    - DIMENSION2_CHARS96
2522
2523    In addition, each character set is assigned an identification tag,
2524    unique for each set, called the "final character" (denoted as <F>
2525    hereafter).  The <F> of each character set is decided by ECMA(*)
2526    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2527    (0x30..0x3F are for private use only).
2528
2529    Note (*): ECMA = European Computer Manufacturers Association
2530
2531    Here are examples of graphic character sets [NAME(<F>)]:
2532         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2533         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2534         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2535         o DIMENSION2_CHARS96 -- none for the moment
2536
2537    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2538         C0 [0x00..0x1F] -- control character plane 0
2539         GL [0x20..0x7F] -- graphic character plane 0
2540         C1 [0x80..0x9F] -- control character plane 1
2541         GR [0xA0..0xFF] -- graphic character plane 1
2542
2543    A control character set is directly designated and invoked to C0 or
2544    C1 by an escape sequence.  The most common case is that:
2545    - ISO646's  control character set is designated/invoked to C0, and
2546    - ISO6429's control character set is designated/invoked to C1,
2547    and usually these designations/invocations are omitted in encoded
2548    text.  In a 7-bit environment, only C0 can be used, and a control
2549    character for C1 is encoded by an appropriate escape sequence to
2550    fit into the environment.  All control characters for C1 are
2551    defined to have corresponding escape sequences.
2552
2553    A graphic character set is at first designated to one of four
2554    graphic registers (G0 through G3), then these graphic registers are
2555    invoked to GL or GR.  These designations and invocations can be
2556    done independently.  The most common case is that G0 is invoked to
2557    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2558    these invocations and designations are omitted in encoded text.
2559    In a 7-bit environment, only GL can be used.
2560
2561    When a graphic character set of CHARS94 is invoked to GL, codes
2562    0x20 and 0x7F of the GL area work as control characters SPACE and
2563    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2564    be used.
2565
2566    There are two ways of invocation: locking-shift and single-shift.
2567    With locking-shift, the invocation lasts until the next different
2568    invocation, whereas with single-shift, the invocation affects the
2569    following character only and doesn't affect the locking-shift
2570    state.  Invocations are done by the following control characters or
2571    escape sequences:
2572
2573    ----------------------------------------------------------------------
2574    abbrev  function                  cntrl escape seq   description
2575    ----------------------------------------------------------------------
2576    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2577    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2578    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2579    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2580    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2581    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2582    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2583    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2584    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2585    ----------------------------------------------------------------------
2586    (*) These are not used by any known coding system.
2587
2588    Control characters for these functions are defined by macros
2589    ISO_CODE_XXX in `coding.h'.
2590
2591    Designations are done by the following escape sequences:
2592    ----------------------------------------------------------------------
2593    escape sequence      description
2594    ----------------------------------------------------------------------
2595    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2596    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2597    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2598    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2599    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2600    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2601    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2602    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2603    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2604    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2605    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2606    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2607    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2608    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2609    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2610    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2611    ----------------------------------------------------------------------
2612
2613    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2614    of dimension 1, chars 94, and final character <F>, etc...
2615
2616    Note (*): Although these designations are not allowed in ISO2022,
2617    Emacs accepts them on decoding, and produces them on encoding
2618    CHARS96 character sets in a coding system which is characterized as
2619    7-bit environment, non-locking-shift, and non-single-shift.
2620
2621    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2622    '(' must be omitted.  We refer to this as "short-form" hereafter.
2623
2624    Now you may notice that there are a lot of ways of encoding the
2625    same multilingual text in ISO2022.  Actually, there exist many
2626    coding systems such as Compound Text (used in X11's inter client
2627    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2628    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2629    localized platforms), and all of these are variants of ISO2022.
2630
2631    In addition to the above, Emacs handles two more kinds of escape
2632    sequences: ISO6429's direction specification and Emacs' private
2633    sequence for specifying character composition.
2634
2635    ISO6429's direction specification takes the following form:
2636         o CSI ']'      -- end of the current direction
2637         o CSI '0' ']'  -- end of the current direction
2638         o CSI '1' ']'  -- start of left-to-right text
2639         o CSI '2' ']'  -- start of right-to-left text
2640    The control character CSI (0x9B: control sequence introducer) is
2641    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2642
2643    Character composition specification takes the following form:
2644         o ESC '0' -- start relative composition
2645         o ESC '1' -- end composition
2646         o ESC '2' -- start rule-base composition (*)
2647         o ESC '3' -- start relative composition with alternate chars  (**)
2648         o ESC '4' -- start rule-base composition with alternate chars  (**)
2649   Since these are not standard escape sequences of any ISO standard,
2650   the use of them with these meanings is restricted to Emacs only.
2651
2652   (*) This form is used only in Emacs 20.7 and older versions,
2653   but newer versions can safely decode it.
2654   (**) This form is used only in Emacs 21.1 and newer versions,
2655   and older versions can't decode it.
2656
2657   Here's a list of example usages of these composition escape
2658   sequences (categorized by `enum composition_method').
2659
2660   COMPOSITION_RELATIVE:
2661         ESC 0 CHAR [ CHAR ] ESC 1
2662   COMPOSITION_WITH_RULE:
2663         ESC 2 CHAR [ RULE CHAR ] ESC 1
2664   COMPOSITION_WITH_ALTCHARS:
2665         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2666   COMPOSITION_WITH_RULE_ALTCHARS:
2667         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2668
2669 enum iso_code_class_type iso_code_class[256];
2670
2671 #define SAFE_CHARSET_P(coding, id)      \
2672   ((id) <= (coding)->max_charset_id     \
2673    && (coding)->safe_charsets[id] >= 0)
2674
2675
2676 #define SHIFT_OUT_OK(category)  \
2677   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2678
2679 static void
2680 setup_iso_safe_charsets (attrs)
2681      Lisp_Object attrs;
2682 {
2683   Lisp_Object charset_list, safe_charsets;
2684   Lisp_Object request;
2685   Lisp_Object reg_usage;
2686   Lisp_Object tail;
2687   int reg94, reg96;
2688   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2689   int max_charset_id;
2690
2691   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2692   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2693       && ! EQ (charset_list, Viso_2022_charset_list))
2694     {
2695       CODING_ATTR_CHARSET_LIST (attrs)
2696         = charset_list = Viso_2022_charset_list;
2697       ASET (attrs, coding_attr_safe_charsets, Qnil);
2698     }
2699
2700   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2701     return;
2702
2703   max_charset_id = 0;
2704   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2705     {
2706       int id = XINT (XCAR (tail));
2707       if (max_charset_id < id)
2708         max_charset_id = id;
2709     }
2710
2711   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2712                                 make_number (255));
2713   request = AREF (attrs, coding_attr_iso_request);
2714   reg_usage = AREF (attrs, coding_attr_iso_usage);
2715   reg94 = XINT (XCAR (reg_usage));
2716   reg96 = XINT (XCDR (reg_usage));
2717
2718   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2719     {
2720       Lisp_Object id;
2721       Lisp_Object reg;
2722       struct charset *charset;
2723
2724       id = XCAR (tail);
2725       charset = CHARSET_FROM_ID (XINT (id));
2726       reg = Fcdr (Fassq (id, request));
2727       if (! NILP (reg))
2728         SSET (safe_charsets, XINT (id), XINT (reg));
2729       else if (charset->iso_chars_96)
2730         {
2731           if (reg96 < 4)
2732             SSET (safe_charsets, XINT (id), reg96);
2733         }
2734       else
2735         {
2736           if (reg94 < 4)
2737             SSET (safe_charsets, XINT (id), reg94);
2738         }
2739     }
2740   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2741 }
2742
2743
2744 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2745    Check if a text is encoded in one of ISO-2022 based codig systems.
2746    If it is, return 1, else return 0.  */
2747
2748 static int
2749 detect_coding_iso_2022 (coding, detect_info)
2750      struct coding_system *coding;
2751      struct coding_detection_info *detect_info;
2752 {
2753   const unsigned char *src = coding->source, *src_base = src;
2754   const unsigned char *src_end = coding->source + coding->src_bytes;
2755   int multibytep = coding->src_multibyte;
2756   int single_shifting = 0;
2757   int id;
2758   int c, c1;
2759   int consumed_chars = 0;
2760   int i;
2761   int rejected = 0;
2762   int found = 0;
2763   int composition_count = -1;
2764
2765   detect_info->checked |= CATEGORY_MASK_ISO;
2766
2767   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2768     {
2769       struct coding_system *this = &(coding_categories[i]);
2770       Lisp_Object attrs, val;
2771
2772       if (this->id < 0)
2773         continue;
2774       attrs = CODING_ID_ATTRS (this->id);
2775       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2776           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2777         setup_iso_safe_charsets (attrs);
2778       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2779       this->max_charset_id = SCHARS (val) - 1;
2780       this->safe_charsets = (char *) SDATA (val);
2781     }
2782
2783   /* A coding system of this category is always ASCII compatible.  */
2784   src += coding->head_ascii;
2785
2786   while (rejected != CATEGORY_MASK_ISO)
2787     {
2788       src_base = src;
2789       ONE_MORE_BYTE (c);
2790       switch (c)
2791         {
2792         case ISO_CODE_ESC:
2793           if (inhibit_iso_escape_detection)
2794             break;
2795           single_shifting = 0;
2796           ONE_MORE_BYTE (c);
2797           if (c >= '(' && c <= '/')
2798             {
2799               /* Designation sequence for a charset of dimension 1.  */
2800               ONE_MORE_BYTE (c1);
2801               if (c1 < ' ' || c1 >= 0x80
2802                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2803                 /* Invalid designation sequence.  Just ignore.  */
2804                 break;
2805             }
2806           else if (c == '$')
2807             {
2808               /* Designation sequence for a charset of dimension 2.  */
2809               ONE_MORE_BYTE (c);
2810               if (c >= '@' && c <= 'B')
2811                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2812                 id = iso_charset_table[1][0][c];
2813               else if (c >= '(' && c <= '/')
2814                 {
2815                   ONE_MORE_BYTE (c1);
2816                   if (c1 < ' ' || c1 >= 0x80
2817                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2818                     /* Invalid designation sequence.  Just ignore.  */
2819                     break;
2820                 }
2821               else
2822                 /* Invalid designation sequence.  Just ignore it.  */
2823                 break;
2824             }
2825           else if (c == 'N' || c == 'O')
2826             {
2827               /* ESC <Fe> for SS2 or SS3.  */
2828               single_shifting = 1;
2829               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2830               break;
2831             }
2832           else if (c == '1')
2833             {
2834               /* End of composition.  */
2835               if (composition_count < 0
2836                   || composition_count > MAX_COMPOSITION_COMPONENTS)
2837                 /* Invalid */
2838                 break;
2839               composition_count = -1;
2840               found |= CATEGORY_MASK_ISO;
2841             }
2842           else if (c >= '0' && c <= '4')
2843             {
2844               /* ESC <Fp> for start/end composition.  */
2845               composition_count = 0;
2846               break;
2847             }
2848           else
2849             {
2850               /* Invalid escape sequence.  Just ignore it.  */
2851               break;
2852             }
2853
2854           /* We found a valid designation sequence for CHARSET.  */
2855           rejected |= CATEGORY_MASK_ISO_8BIT;
2856           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2857                               id))
2858             found |= CATEGORY_MASK_ISO_7;
2859           else
2860             rejected |= CATEGORY_MASK_ISO_7;
2861           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2862                               id))
2863             found |= CATEGORY_MASK_ISO_7_TIGHT;
2864           else
2865             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2866           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2867                               id))
2868             found |= CATEGORY_MASK_ISO_7_ELSE;
2869           else
2870             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2871           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2872                               id))
2873             found |= CATEGORY_MASK_ISO_8_ELSE;
2874           else
2875             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2876           break;
2877
2878         case ISO_CODE_SO:
2879         case ISO_CODE_SI:
2880           /* Locking shift out/in.  */
2881           if (inhibit_iso_escape_detection)
2882             break;
2883           single_shifting = 0;
2884           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2885           break;
2886
2887         case ISO_CODE_CSI:
2888           /* Control sequence introducer.  */
2889           single_shifting = 0;
2890           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2891           found |= CATEGORY_MASK_ISO_8_ELSE;
2892           goto check_extra_latin;
2893
2894         case ISO_CODE_SS2:
2895         case ISO_CODE_SS3:
2896           /* Single shift.   */
2897           if (inhibit_iso_escape_detection)
2898             break;
2899           single_shifting = 0;
2900           rejected |= CATEGORY_MASK_ISO_7BIT;
2901           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2902               & CODING_ISO_FLAG_SINGLE_SHIFT)
2903             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2904           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2905               & CODING_ISO_FLAG_SINGLE_SHIFT)
2906             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2907           if (single_shifting)
2908             break;
2909           goto check_extra_latin;
2910
2911         default:
2912           if (c < 0)
2913             continue;
2914           if (c < 0x80)
2915             {
2916               if (composition_count >= 0)
2917                 composition_count++;
2918               single_shifting = 0;
2919               break;
2920             }
2921           if (c >= 0xA0)
2922             {
2923               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2924               found |= CATEGORY_MASK_ISO_8_1;
2925               /* Check the length of succeeding codes of the range
2926                  0xA0..0FF.  If the byte length is even, we include
2927                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2928                  only when we are not single shifting.  */
2929               if (! single_shifting
2930                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2931                 {
2932                   int i = 1;
2933                   while (src < src_end)
2934                     {
2935                       ONE_MORE_BYTE (c);
2936                       if (c < 0xA0)
2937                         break;
2938                       i++;
2939                     }
2940
2941                   if (i & 1 && src < src_end)
2942                     {
2943                       rejected |= CATEGORY_MASK_ISO_8_2;
2944                       if (composition_count >= 0)
2945                         composition_count += i;
2946                     }
2947                   else
2948                     {
2949                       found |= CATEGORY_MASK_ISO_8_2;
2950                       if (composition_count >= 0)
2951                         composition_count += i / 2;
2952                     }
2953                 }
2954               break;
2955             }
2956         check_extra_latin:
2957           single_shifting = 0;
2958           if (! VECTORP (Vlatin_extra_code_table)
2959               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2960             {
2961               rejected = CATEGORY_MASK_ISO;
2962               break;
2963             }
2964           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2965               & CODING_ISO_FLAG_LATIN_EXTRA)
2966             found |= CATEGORY_MASK_ISO_8_1;
2967           else
2968             rejected |= CATEGORY_MASK_ISO_8_1;
2969           rejected |= CATEGORY_MASK_ISO_8_2;
2970         }
2971     }
2972   detect_info->rejected |= CATEGORY_MASK_ISO;
2973   return 0;
2974
2975  no_more_source:
2976   detect_info->rejected |= rejected;
2977   detect_info->found |= (found & ~rejected);
2978   return 1;
2979 }
2980
2981
2982 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
2983    escape sequence should be kept.  */
2984 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2985   do {                                                                  \
2986     int id, prev;                                                       \
2987                                                                         \
2988     if (final < '0' || final >= 128                                     \
2989         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2990         || !SAFE_CHARSET_P (coding, id))                                \
2991       {                                                                 \
2992         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2993         chars_96 = -1;                                                  \
2994         break;                                                          \
2995       }                                                                 \
2996     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2997     if (id == charset_jisx0201_roman)                                   \
2998       {                                                                 \
2999         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3000           id = charset_ascii;                                           \
3001       }                                                                 \
3002     else if (id == charset_jisx0208_1978)                               \
3003       {                                                                 \
3004         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3005           id = charset_jisx0208;                                        \
3006       }                                                                 \
3007     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3008     /* If there was an invalid designation to REG previously, and this  \
3009        designation is ASCII to REG, we should keep this designation     \
3010        sequence.  */                                                    \
3011     if (prev == -2 && id == charset_ascii)                              \
3012       chars_96 = -1;                                                    \
3013   } while (0)
3014
3015
3016 #define MAYBE_FINISH_COMPOSITION()                              \
3017   do {                                                          \
3018     int i;                                                      \
3019     if (composition_state == COMPOSING_NO)                      \
3020       break;                                                    \
3021     /* It is assured that we have enough room for producing     \
3022        characters stored in the table `components'.  */         \
3023     if (charbuf + component_idx > charbuf_end)                  \
3024       goto no_more_source;                                      \
3025     composition_state = COMPOSING_NO;                           \
3026     if (method == COMPOSITION_RELATIVE                          \
3027         || method == COMPOSITION_WITH_ALTCHARS)                 \
3028       {                                                         \
3029         for (i = 0; i < component_idx; i++)                     \
3030           *charbuf++ = components[i];                           \
3031         char_offset += component_idx;                           \
3032       }                                                         \
3033     else                                                        \
3034       {                                                         \
3035         for (i = 0; i < component_idx; i += 2)                  \
3036           *charbuf++ = components[i];                           \
3037         char_offset += (component_idx / 2) + 1;                 \
3038       }                                                         \
3039   } while (0)
3040
3041
3042 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3043    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3044    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3045    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3046    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3047   */
3048
3049 #define DECODE_COMPOSITION_START(c1)                                    \
3050   do {                                                                  \
3051     if (c1 == '0'                                                       \
3052         && composition_state == COMPOSING_COMPONENT_RULE)               \
3053       {                                                                 \
3054         component_len = component_idx;                                  \
3055         composition_state = COMPOSING_CHAR;                             \
3056       }                                                                 \
3057     else                                                                \
3058       {                                                                 \
3059         const unsigned char *p;                                         \
3060                                                                         \
3061         MAYBE_FINISH_COMPOSITION ();                                    \
3062         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
3063           goto no_more_source;                                          \
3064         for (p = src; p < src_end - 1; p++)                             \
3065           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
3066             break;                                                      \
3067         if (p == src_end - 1)                                           \
3068           {                                                             \
3069             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
3070               goto invalid_code;                                        \
3071             /* The current composition doesn't end in the current       \
3072                source.  */                                              \
3073             record_conversion_result                                    \
3074               (coding, CODING_RESULT_INSUFFICIENT_SRC);                 \
3075             goto no_more_source;                                        \
3076           }                                                             \
3077                                                                         \
3078         /* This is surely the start of a composition.  */               \
3079         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
3080                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
3081                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
3082                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
3083         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
3084                              : COMPOSING_COMPONENT_CHAR);               \
3085         component_idx = component_len = 0;                              \
3086       }                                                                 \
3087   } while (0)
3088
3089
3090 /* Handle compositoin end sequence ESC 1.  */
3091
3092 #define DECODE_COMPOSITION_END()                                        \
3093   do {                                                                  \
3094     int nchars = (component_len > 0 ? component_idx - component_len     \
3095                   : method == COMPOSITION_RELATIVE ? component_idx      \
3096                   : (component_idx + 1) / 2);                           \
3097     int i;                                                              \
3098     int *saved_charbuf = charbuf;                                       \
3099                                                                         \
3100     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
3101     if (method != COMPOSITION_RELATIVE)                                 \
3102       {                                                                 \
3103         if (component_len == 0)                                         \
3104           for (i = 0; i < component_idx; i++)                           \
3105             *charbuf++ = components[i];                                 \
3106         else                                                            \
3107           for (i = 0; i < component_len; i++)                           \
3108             *charbuf++ = components[i];                                 \
3109         *saved_charbuf = saved_charbuf - charbuf;                       \
3110       }                                                                 \
3111     if (method == COMPOSITION_WITH_RULE)                                \
3112       for (i = 0; i < component_idx; i += 2, char_offset++)             \
3113         *charbuf++ = components[i];                                     \
3114     else                                                                \
3115       for (i = component_len; i < component_idx; i++, char_offset++)    \
3116         *charbuf++ = components[i];                                     \
3117     coding->annotated = 1;                                              \
3118     composition_state = COMPOSING_NO;                                   \
3119   } while (0)
3120
3121
3122 /* Decode a composition rule from the byte C1 (and maybe one more byte
3123    from SRC) and store one encoded composition rule in
3124    coding->cmp_data.  */
3125
3126 #define DECODE_COMPOSITION_RULE(c1)                                     \
3127   do {                                                                  \
3128     (c1) -= 32;                                                         \
3129     if (c1 < 81)                /* old format (before ver.21) */        \
3130       {                                                                 \
3131         int gref = (c1) / 9;                                            \
3132         int nref = (c1) % 9;                                            \
3133         if (gref == 4) gref = 10;                                       \
3134         if (nref == 4) nref = 10;                                       \
3135         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
3136       }                                                                 \
3137     else if (c1 < 93)           /* new format (after ver.21) */         \
3138       {                                                                 \
3139         ONE_MORE_BYTE (c2);                                             \
3140         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
3141       }                                                                 \
3142     else                                                                \
3143       c1 = 0;                                                           \
3144   } while (0)
3145
3146
3147 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3148
3149 static void
3150 decode_coding_iso_2022 (coding)
3151      struct coding_system *coding;
3152 {
3153   const unsigned char *src = coding->source + coding->consumed;
3154   const unsigned char *src_end = coding->source + coding->src_bytes;
3155   const unsigned char *src_base;
3156   int *charbuf = coding->charbuf + coding->charbuf_used;
3157   int *charbuf_end
3158     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
3159   int consumed_chars = 0, consumed_chars_base;
3160   int multibytep = coding->src_multibyte;
3161   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3162   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3163   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3164   int charset_id_2, charset_id_3;
3165   struct charset *charset;
3166   int c;
3167   /* For handling composition sequence.  */
3168 #define COMPOSING_NO                    0
3169 #define COMPOSING_CHAR                  1
3170 #define COMPOSING_RULE                  2
3171 #define COMPOSING_COMPONENT_CHAR        3
3172 #define COMPOSING_COMPONENT_RULE        4
3173
3174   int composition_state = COMPOSING_NO;
3175   enum composition_method method;
3176   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3177   int component_idx;
3178   int component_len;
3179   Lisp_Object attrs, charset_list;
3180   int char_offset = coding->produced_char;
3181   int last_offset = char_offset;
3182   int last_id = charset_ascii;
3183   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3184   int byte_after_cr = -1;
3185
3186   CODING_GET_INFO (coding, attrs, charset_list);
3187   setup_iso_safe_charsets (attrs);
3188   /* Charset list may have been changed.  */
3189   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3190   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3191
3192   while (1)
3193     {
3194       int c1, c2;
3195
3196       src_base = src;
3197       consumed_chars_base = consumed_chars;
3198
3199       if (charbuf >= charbuf_end)
3200         break;
3201
3202       if (byte_after_cr >= 0)
3203         c1 = byte_after_cr, byte_after_cr = -1;
3204       else
3205         ONE_MORE_BYTE (c1);
3206       if (c1 < 0)
3207         goto invalid_code;
3208
3209       /* We produce at most one character.  */
3210       switch (iso_code_class [c1])
3211         {
3212         case ISO_0x20_or_0x7F:
3213           if (composition_state != COMPOSING_NO)
3214             {
3215               if (composition_state == COMPOSING_RULE
3216                   || composition_state == COMPOSING_COMPONENT_RULE)
3217                 {
3218                   if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3219                     {
3220                       DECODE_COMPOSITION_RULE (c1);
3221                       components[component_idx++] = c1;
3222                       composition_state--;
3223                       continue;
3224                     }
3225                   /* Too long composition.  */
3226                   MAYBE_FINISH_COMPOSITION ();
3227                 }
3228             }
3229           if (charset_id_0 < 0
3230               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3231             /* This is SPACE or DEL.  */
3232             charset = CHARSET_FROM_ID (charset_ascii);
3233           else
3234             charset = CHARSET_FROM_ID (charset_id_0);
3235           break;
3236
3237         case ISO_graphic_plane_0:
3238           if (composition_state != COMPOSING_NO)
3239             {
3240               if (composition_state == COMPOSING_RULE
3241                   || composition_state == COMPOSING_COMPONENT_RULE)
3242                 {
3243                   if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3244                     {
3245                       DECODE_COMPOSITION_RULE (c1);
3246                       components[component_idx++] = c1;
3247                       composition_state--;
3248                       continue;
3249                     }
3250                   MAYBE_FINISH_COMPOSITION ();
3251                 }
3252             }
3253           if (charset_id_0 < 0)
3254             charset = CHARSET_FROM_ID (charset_ascii);
3255           else
3256             charset = CHARSET_FROM_ID (charset_id_0);
3257           break;
3258
3259         case ISO_0xA0_or_0xFF:
3260           if (charset_id_1 < 0
3261               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3262               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3263             goto invalid_code;
3264           /* This is a graphic character, we fall down ... */
3265
3266         case ISO_graphic_plane_1:
3267           if (charset_id_1 < 0)
3268             goto invalid_code;
3269           charset = CHARSET_FROM_ID (charset_id_1);
3270           break;
3271
3272         case ISO_control_0:
3273           if (eol_crlf && c1 == '\r')
3274             ONE_MORE_BYTE (byte_after_cr);
3275           MAYBE_FINISH_COMPOSITION ();
3276           charset = CHARSET_FROM_ID (charset_ascii);
3277           break;
3278
3279         case ISO_control_1:
3280           MAYBE_FINISH_COMPOSITION ();
3281           goto invalid_code;
3282
3283         case ISO_shift_out:
3284           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3285               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3286             goto invalid_code;
3287           CODING_ISO_INVOCATION (coding, 0) = 1;
3288           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3289           continue;
3290
3291         case ISO_shift_in:
3292           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3293             goto invalid_code;
3294           CODING_ISO_INVOCATION (coding, 0) = 0;
3295           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3296           continue;
3297
3298         case ISO_single_shift_2_7:
3299         case ISO_single_shift_2:
3300           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3301             goto invalid_code;
3302           /* SS2 is handled as an escape sequence of ESC 'N' */
3303           c1 = 'N';
3304           goto label_escape_sequence;
3305
3306         case ISO_single_shift_3:
3307           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3308             goto invalid_code;
3309           /* SS2 is handled as an escape sequence of ESC 'O' */
3310           c1 = 'O';
3311           goto label_escape_sequence;
3312
3313         case ISO_control_sequence_introducer:
3314           /* CSI is handled as an escape sequence of ESC '[' ...  */
3315           c1 = '[';
3316           goto label_escape_sequence;
3317
3318         case ISO_escape:
3319           ONE_MORE_BYTE (c1);
3320         label_escape_sequence:
3321           /* Escape sequences handled here are invocation,
3322              designation, direction specification, and character
3323              composition specification.  */
3324           switch (c1)
3325             {
3326             case '&':           /* revision of following character set */
3327               ONE_MORE_BYTE (c1);
3328               if (!(c1 >= '@' && c1 <= '~'))
3329                 goto invalid_code;
3330               ONE_MORE_BYTE (c1);
3331               if (c1 != ISO_CODE_ESC)
3332                 goto invalid_code;
3333               ONE_MORE_BYTE (c1);
3334               goto label_escape_sequence;
3335
3336             case '$':           /* designation of 2-byte character set */
3337               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3338                 goto invalid_code;
3339               {
3340                 int reg, chars96;
3341
3342                 ONE_MORE_BYTE (c1);
3343                 if (c1 >= '@' && c1 <= 'B')
3344                   {     /* designation of JISX0208.1978, GB2312.1980,
3345                            or JISX0208.1980 */
3346                     reg = 0, chars96 = 0;
3347                   }
3348                 else if (c1 >= 0x28 && c1 <= 0x2B)
3349                   { /* designation of DIMENSION2_CHARS94 character set */
3350                     reg = c1 - 0x28, chars96 = 0;
3351                     ONE_MORE_BYTE (c1);
3352                   }
3353                 else if (c1 >= 0x2C && c1 <= 0x2F)
3354                   { /* designation of DIMENSION2_CHARS96 character set */
3355                     reg = c1 - 0x2C, chars96 = 1;
3356                     ONE_MORE_BYTE (c1);
3357                   }
3358                 else
3359                   goto invalid_code;
3360                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3361                 /* We must update these variables now.  */
3362                 if (reg == 0)
3363                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3364                 else if (reg == 1)
3365                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3366                 if (chars96 < 0)
3367                   goto invalid_code;
3368               }
3369               continue;
3370
3371             case 'n':           /* invocation of locking-shift-2 */
3372               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3373                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3374                 goto invalid_code;
3375               CODING_ISO_INVOCATION (coding, 0) = 2;
3376               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3377               continue;
3378
3379             case 'o':           /* invocation of locking-shift-3 */
3380               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3381                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3382                 goto invalid_code;
3383               CODING_ISO_INVOCATION (coding, 0) = 3;
3384               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3385               continue;
3386
3387             case 'N':           /* invocation of single-shift-2 */
3388               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3389                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3390                 goto invalid_code;
3391               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3392               if (charset_id_2 < 0)
3393                 charset = CHARSET_FROM_ID (charset_ascii);
3394               else
3395                 charset = CHARSET_FROM_ID (charset_id_2);
3396               ONE_MORE_BYTE (c1);
3397               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3398                 goto invalid_code;
3399               break;
3400
3401             case 'O':           /* invocation of single-shift-3 */
3402               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3403                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3404                 goto invalid_code;
3405               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3406               if (charset_id_3 < 0)
3407                 charset = CHARSET_FROM_ID (charset_ascii);
3408               else
3409                 charset = CHARSET_FROM_ID (charset_id_3);
3410               ONE_MORE_BYTE (c1);
3411               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3412                 goto invalid_code;
3413               break;
3414
3415             case '0': case '2': case '3': case '4': /* start composition */
3416               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3417                 goto invalid_code;
3418               DECODE_COMPOSITION_START (c1);
3419               continue;
3420
3421             case '1':           /* end composition */
3422               if (composition_state == COMPOSING_NO)
3423                 goto invalid_code;
3424               DECODE_COMPOSITION_END ();
3425               continue;
3426
3427             case '[':           /* specification of direction */
3428               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3429                 goto invalid_code;
3430               /* For the moment, nested direction is not supported.
3431                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3432                  left-to-right, and nozero means right-to-left.  */
3433               ONE_MORE_BYTE (c1);
3434               switch (c1)
3435                 {
3436                 case ']':       /* end of the current direction */
3437                   coding->mode &= ~CODING_MODE_DIRECTION;
3438
3439                 case '0':       /* end of the current direction */
3440                 case '1':       /* start of left-to-right direction */
3441                   ONE_MORE_BYTE (c1);
3442                   if (c1 == ']')
3443                     coding->mode &= ~CODING_MODE_DIRECTION;
3444                   else
3445                     goto invalid_code;
3446                   break;
3447
3448                 case '2':       /* start of right-to-left direction */
3449                   ONE_MORE_BYTE (c1);
3450                   if (c1 == ']')
3451                     coding->mode |= CODING_MODE_DIRECTION;
3452                   else
3453                     goto invalid_code;
3454                   break;
3455
3456                 default:
3457                   goto invalid_code;
3458                 }
3459               continue;
3460
3461             case '%':
3462               ONE_MORE_BYTE (c1);
3463               if (c1 == '/')
3464                 {
3465                   /* CTEXT extended segment:
3466                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3467                      We keep these bytes as is for the moment.
3468                      They may be decoded by post-read-conversion.  */
3469                   int dim, M, L;
3470                   int size;
3471
3472                   ONE_MORE_BYTE (dim);
3473                   ONE_MORE_BYTE (M);
3474                   ONE_MORE_BYTE (L);
3475                   size = ((M - 128) * 128) + (L - 128);
3476                   if (charbuf + 8 + size > charbuf_end)
3477                     goto break_loop;
3478                   *charbuf++ = ISO_CODE_ESC;
3479                   *charbuf++ = '%';
3480                   *charbuf++ = '/';
3481                   *charbuf++ = dim;
3482                   *charbuf++ = BYTE8_TO_CHAR (M);
3483                   *charbuf++ = BYTE8_TO_CHAR (L);
3484                   while (size-- > 0)
3485                     {
3486                       ONE_MORE_BYTE (c1);
3487                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3488                     }
3489                 }
3490               else if (c1 == 'G')
3491                 {
3492                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3493                      ESC % G --UTF-8-BYTES-- ESC % @
3494                      We keep these bytes as is for the moment.
3495                      They may be decoded by post-read-conversion.  */
3496                   int *p = charbuf;
3497
3498                   if (p + 6 > charbuf_end)
3499                     goto break_loop;
3500                   *p++ = ISO_CODE_ESC;
3501                   *p++ = '%';
3502                   *p++ = 'G';
3503                   while (p < charbuf_end)
3504                     {
3505                       ONE_MORE_BYTE (c1);
3506                       if (c1 == ISO_CODE_ESC
3507                           && src + 1 < src_end
3508                           && src[0] == '%'
3509                           && src[1] == '@')
3510                         {
3511                           src += 2;
3512                           break;
3513                         }
3514                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3515                     }
3516                   if (p + 3 > charbuf_end)
3517                     goto break_loop;
3518                   *p++ = ISO_CODE_ESC;
3519                   *p++ = '%';
3520                   *p++ = '@';
3521                   charbuf = p;
3522                 }
3523               else
3524                 goto invalid_code;
3525               continue;
3526               break;
3527
3528             default:
3529               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3530                 goto invalid_code;
3531               {
3532                 int reg, chars96;
3533
3534                 if (c1 >= 0x28 && c1 <= 0x2B)
3535                   { /* designation of DIMENSION1_CHARS94 character set */
3536                     reg = c1 - 0x28, chars96 = 0;
3537                     ONE_MORE_BYTE (c1);
3538                   }
3539                 else if (c1 >= 0x2C && c1 <= 0x2F)
3540                   { /* designation of DIMENSION1_CHARS96 character set */
3541                     reg = c1 - 0x2C, chars96 = 1;
3542                     ONE_MORE_BYTE (c1);
3543                   }
3544                 else
3545                   goto invalid_code;
3546                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3547                 /* We must update these variables now.  */
3548                 if (reg == 0)
3549                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3550                 else if (reg == 1)
3551                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3552                 if (chars96 < 0)
3553                   goto invalid_code;
3554               }
3555               continue;
3556             }
3557         }
3558
3559       if (charset->id != charset_ascii
3560           && last_id != charset->id)
3561         {
3562           if (last_id != charset_ascii)
3563             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3564           last_id = charset->id;
3565           last_offset = char_offset;
3566         }
3567
3568       /* Now we know CHARSET and 1st position code C1 of a character.
3569          Produce a decoded character while getting 2nd position code
3570          C2 if necessary.  */
3571       c1 &= 0x7F;
3572       if (CHARSET_DIMENSION (charset) > 1)
3573         {
3574           ONE_MORE_BYTE (c2);
3575           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3576             /* C2 is not in a valid range.  */
3577             goto invalid_code;
3578           c1 = (c1 << 8) | (c2 & 0x7F);
3579           if (CHARSET_DIMENSION (charset) > 2)
3580             {
3581               ONE_MORE_BYTE (c2);
3582               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3583                 /* C2 is not in a valid range.  */
3584                 goto invalid_code;
3585               c1 = (c1 << 8) | (c2 & 0x7F);
3586             }
3587         }
3588
3589       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3590       if (c < 0)
3591         {
3592           MAYBE_FINISH_COMPOSITION ();
3593           for (; src_base < src; src_base++, char_offset++)
3594             {
3595               if (ASCII_BYTE_P (*src_base))
3596                 *charbuf++ = *src_base;
3597               else
3598                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3599             }
3600         }
3601       else if (composition_state == COMPOSING_NO)
3602         {
3603           *charbuf++ = c;
3604           char_offset++;
3605         }
3606       else
3607         {
3608           if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3609             {
3610               components[component_idx++] = c;
3611               if (method == COMPOSITION_WITH_RULE
3612                   || (method == COMPOSITION_WITH_RULE_ALTCHARS
3613                       && composition_state == COMPOSING_COMPONENT_CHAR))
3614                 composition_state++;
3615             }
3616           else
3617             {
3618               MAYBE_FINISH_COMPOSITION ();
3619               *charbuf++ = c;
3620               char_offset++;
3621             }
3622         }
3623       continue;
3624
3625     invalid_code:
3626       MAYBE_FINISH_COMPOSITION ();
3627       src = src_base;
3628       consumed_chars = consumed_chars_base;
3629       ONE_MORE_BYTE (c);
3630       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3631       char_offset++;
3632       coding->errors++;
3633       continue;
3634
3635     break_loop:
3636       break;
3637     }
3638
3639  no_more_source:
3640   if (last_id != charset_ascii)
3641     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3642   coding->consumed_char += consumed_chars_base;
3643   coding->consumed = src_base - coding->source;
3644   coding->charbuf_used = charbuf - coding->charbuf;
3645 }
3646
3647
3648 /* ISO2022 encoding stuff.  */
3649
3650 /*
3651    It is not enough to say just "ISO2022" on encoding, we have to
3652    specify more details.  In Emacs, each coding system of ISO2022
3653    variant has the following specifications:
3654         1. Initial designation to G0 thru G3.
3655         2. Allows short-form designation?
3656         3. ASCII should be designated to G0 before control characters?
3657         4. ASCII should be designated to G0 at end of line?
3658         5. 7-bit environment or 8-bit environment?
3659         6. Use locking-shift?
3660         7. Use Single-shift?
3661    And the following two are only for Japanese:
3662         8. Use ASCII in place of JIS0201-1976-Roman?
3663         9. Use JISX0208-1983 in place of JISX0208-1978?
3664    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3665    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3666    details.
3667 */
3668
3669 /* Produce codes (escape sequence) for designating CHARSET to graphic
3670    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3671    '@', 'A', or 'B' and the coding system CODING allows, produce
3672    designation sequence of short-form.  */
3673
3674 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3675   do {                                                                  \
3676     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3677     char *intermediate_char_94 = "()*+";                                \
3678     char *intermediate_char_96 = ",-./";                                \
3679     int revision = -1;                                                  \
3680     int c;                                                              \
3681                                                                         \
3682     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3683       revision = CHARSET_ISO_REVISION (charset);                        \
3684                                                                         \
3685     if (revision >= 0)                                                  \
3686       {                                                                 \
3687         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3688         EMIT_ONE_BYTE ('@' + revision);                                 \
3689       }                                                                 \
3690     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3691     if (CHARSET_DIMENSION (charset) == 1)                               \
3692       {                                                                 \
3693         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3694           c = intermediate_char_94[reg];                                \
3695         else                                                            \
3696           c = intermediate_char_96[reg];                                \
3697         EMIT_ONE_ASCII_BYTE (c);                                        \
3698       }                                                                 \
3699     else                                                                \
3700       {                                                                 \
3701         EMIT_ONE_ASCII_BYTE ('$');                                      \
3702         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3703           {                                                             \
3704             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3705                 || reg != 0                                             \
3706                 || final_char < '@' || final_char > 'B')                \
3707               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3708           }                                                             \
3709         else                                                            \
3710           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3711       }                                                                 \
3712     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3713                                                                         \
3714     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3715   } while (0)
3716
3717
3718 /* The following two macros produce codes (control character or escape
3719    sequence) for ISO2022 single-shift functions (single-shift-2 and
3720    single-shift-3).  */
3721
3722 #define ENCODE_SINGLE_SHIFT_2                                           \
3723   do {                                                                  \
3724     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3725       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3726     else                                                                \
3727       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3728     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3729   } while (0)
3730
3731
3732 #define ENCODE_SINGLE_SHIFT_3                                           \
3733   do {                                                                  \
3734     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3735       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3736     else                                                                \
3737       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3738     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3739   } while (0)
3740
3741
3742 /* The following four macros produce codes (control character or
3743    escape sequence) for ISO2022 locking-shift functions (shift-in,
3744    shift-out, locking-shift-2, and locking-shift-3).  */
3745
3746 #define ENCODE_SHIFT_IN                                 \
3747   do {                                                  \
3748     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3749     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3750   } while (0)
3751
3752
3753 #define ENCODE_SHIFT_OUT                                \
3754   do {                                                  \
3755     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3756     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3757   } while (0)
3758
3759
3760 #define ENCODE_LOCKING_SHIFT_2                          \
3761   do {                                                  \
3762     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3763     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3764   } while (0)
3765
3766
3767 #define ENCODE_LOCKING_SHIFT_3                          \
3768   do {                                                  \
3769     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3770     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3771   } while (0)
3772
3773
3774 /* Produce codes for a DIMENSION1 character whose character set is
3775    CHARSET and whose position-code is C1.  Designation and invocation
3776    sequences are also produced in advance if necessary.  */
3777
3778 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3779   do {                                                                  \
3780     int id = CHARSET_ID (charset);                                      \
3781                                                                         \
3782     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3783         && id == charset_ascii)                                         \
3784       {                                                                 \
3785         id = charset_jisx0201_roman;                                    \
3786         charset = CHARSET_FROM_ID (id);                                 \
3787       }                                                                 \
3788                                                                         \
3789     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3790       {                                                                 \
3791         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3792           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3793         else                                                            \
3794           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3795         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3796         break;                                                          \
3797       }                                                                 \
3798     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3799       {                                                                 \
3800         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3801         break;                                                          \
3802       }                                                                 \
3803     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3804       {                                                                 \
3805         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3806         break;                                                          \
3807       }                                                                 \
3808     else                                                                \
3809       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3810          must invoke it, or, at first, designate it to some graphic     \
3811          register.  Then repeat the loop to actually produce the        \
3812          character.  */                                                 \
3813       dst = encode_invocation_designation (charset, coding, dst,        \
3814                                            &produced_chars);            \
3815   } while (1)
3816
3817
3818 /* Produce codes for a DIMENSION2 character whose character set is
3819    CHARSET and whose position-codes are C1 and C2.  Designation and
3820    invocation codes are also produced in advance if necessary.  */
3821
3822 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3823   do {                                                                  \
3824     int id = CHARSET_ID (charset);                                      \
3825                                                                         \
3826     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3827         && id == charset_jisx0208)                                      \
3828       {                                                                 \
3829         id = charset_jisx0208_1978;                                     \
3830         charset = CHARSET_FROM_ID (id);                                 \
3831       }                                                                 \
3832                                                                         \
3833     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3834       {                                                                 \
3835         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3836           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3837         else                                                            \
3838           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3839         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3840         break;                                                          \
3841       }                                                                 \
3842     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3843       {                                                                 \
3844         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3845         break;                                                          \
3846       }                                                                 \
3847     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3848       {                                                                 \
3849         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3850         break;                                                          \
3851       }                                                                 \
3852     else                                                                \
3853       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3854          must invoke it, or, at first, designate it to some graphic     \
3855          register.  Then repeat the loop to actually produce the        \
3856          character.  */                                                 \
3857       dst = encode_invocation_designation (charset, coding, dst,        \
3858                                            &produced_chars);            \
3859   } while (1)
3860
3861
3862 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3863   do {                                                                     \
3864     int code = ENCODE_CHAR ((charset),(c));                                \
3865                                                                            \
3866     if (CHARSET_DIMENSION (charset) == 1)                                  \
3867       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3868     else                                                                   \
3869       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3870   } while (0)
3871
3872
3873 /* Produce designation and invocation codes at a place pointed by DST
3874    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3875    Return new DST.  */
3876
3877 unsigned char *
3878 encode_invocation_designation (charset, coding, dst, p_nchars)
3879      struct charset *charset;
3880      struct coding_system *coding;
3881      unsigned char *dst;
3882      int *p_nchars;
3883 {
3884   int multibytep = coding->dst_multibyte;
3885   int produced_chars = *p_nchars;
3886   int reg;                      /* graphic register number */
3887   int id = CHARSET_ID (charset);
3888
3889   /* At first, check designations.  */
3890   for (reg = 0; reg < 4; reg++)
3891     if (id == CODING_ISO_DESIGNATION (coding, reg))
3892       break;
3893
3894   if (reg >= 4)
3895     {
3896       /* CHARSET is not yet designated to any graphic registers.  */
3897       /* At first check the requested designation.  */
3898       reg = CODING_ISO_REQUEST (coding, id);
3899       if (reg < 0)
3900         /* Since CHARSET requests no special designation, designate it
3901            to graphic register 0.  */
3902         reg = 0;
3903
3904       ENCODE_DESIGNATION (charset, reg, coding);
3905     }
3906
3907   if (CODING_ISO_INVOCATION (coding, 0) != reg
3908       && CODING_ISO_INVOCATION (coding, 1) != reg)
3909     {
3910       /* Since the graphic register REG is not invoked to any graphic
3911          planes, invoke it to graphic plane 0.  */
3912       switch (reg)
3913         {
3914         case 0:                 /* graphic register 0 */
3915           ENCODE_SHIFT_IN;
3916           break;
3917
3918         case 1:                 /* graphic register 1 */
3919           ENCODE_SHIFT_OUT;
3920           break;
3921
3922         case 2:                 /* graphic register 2 */
3923           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3924             ENCODE_SINGLE_SHIFT_2;
3925           else
3926             ENCODE_LOCKING_SHIFT_2;
3927           break;
3928
3929         case 3:                 /* graphic register 3 */
3930           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3931             ENCODE_SINGLE_SHIFT_3;
3932           else
3933             ENCODE_LOCKING_SHIFT_3;
3934           break;
3935         }
3936     }
3937
3938   *p_nchars = produced_chars;
3939   return dst;
3940 }
3941
3942 /* The following three macros produce codes for indicating direction
3943    of text.  */
3944 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3945   do {                                                                  \
3946     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3947       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3948     else                                                                \
3949       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3950   } while (0)
3951
3952
3953 #define ENCODE_DIRECTION_R2L()                  \
3954   do {                                          \
3955     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3956     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3957   } while (0)
3958
3959
3960 #define ENCODE_DIRECTION_L2R()                  \
3961   do {                                          \
3962     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3963     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3964   } while (0)
3965
3966
3967 /* Produce codes for designation and invocation to reset the graphic
3968    planes and registers to initial state.  */
3969 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3970   do {                                                                  \
3971     int reg;                                                            \
3972     struct charset *charset;                                            \
3973                                                                         \
3974     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3975       ENCODE_SHIFT_IN;                                                  \
3976     for (reg = 0; reg < 4; reg++)                                       \
3977       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3978           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3979               != CODING_ISO_INITIAL (coding, reg)))                     \
3980         {                                                               \
3981           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3982           ENCODE_DESIGNATION (charset, reg, coding);                    \
3983         }                                                               \
3984   } while (0)
3985
3986
3987 /* Produce designation sequences of charsets in the line started from
3988    SRC to a place pointed by DST, and return updated DST.
3989
3990    If the current block ends before any end-of-line, we may fail to
3991    find all the necessary designations.  */
3992
3993 static unsigned char *
3994 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3995      struct coding_system *coding;
3996      int *charbuf, *charbuf_end;
3997      unsigned char *dst;
3998 {
3999   struct charset *charset;
4000   /* Table of charsets to be designated to each graphic register.  */
4001   int r[4];
4002   int c, found = 0, reg;
4003   int produced_chars = 0;
4004   int multibytep = coding->dst_multibyte;
4005   Lisp_Object attrs;
4006   Lisp_Object charset_list;
4007
4008   attrs = CODING_ID_ATTRS (coding->id);
4009   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4010   if (EQ (charset_list, Qiso_2022))
4011     charset_list = Viso_2022_charset_list;
4012
4013   for (reg = 0; reg < 4; reg++)
4014     r[reg] = -1;
4015
4016   while (found < 4)
4017     {
4018       int id;
4019
4020       c = *charbuf++;
4021       if (c == '\n')
4022         break;
4023       charset = char_charset (c, charset_list, NULL);
4024       id = CHARSET_ID (charset);
4025       reg = CODING_ISO_REQUEST (coding, id);
4026       if (reg >= 0 && r[reg] < 0)
4027         {
4028           found++;
4029           r[reg] = id;
4030         }
4031     }
4032
4033   if (found)
4034     {
4035       for (reg = 0; reg < 4; reg++)
4036         if (r[reg] >= 0
4037             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4038           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4039     }
4040
4041   return dst;
4042 }
4043
4044 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4045
4046 static int
4047 encode_coding_iso_2022 (coding)
4048      struct coding_system *coding;
4049 {
4050   int multibytep = coding->dst_multibyte;
4051   int *charbuf = coding->charbuf;
4052   int *charbuf_end = charbuf + coding->charbuf_used;
4053   unsigned char *dst = coding->destination + coding->produced;
4054   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4055   int safe_room = 16;
4056   int bol_designation
4057     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4058        && CODING_ISO_BOL (coding));
4059   int produced_chars = 0;
4060   Lisp_Object attrs, eol_type, charset_list;
4061   int ascii_compatible;
4062   int c;
4063   int preferred_charset_id = -1;
4064
4065   CODING_GET_INFO (coding, attrs, charset_list);
4066   eol_type = CODING_ID_EOL_TYPE (coding->id);
4067   if (VECTORP (eol_type))
4068     eol_type = Qunix;
4069
4070   setup_iso_safe_charsets (attrs);
4071   /* Charset list may have been changed.  */
4072   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4073   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
4074
4075   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4076
4077   while (charbuf < charbuf_end)
4078     {
4079       ASSURE_DESTINATION (safe_room);
4080
4081       if (bol_designation)
4082         {
4083           unsigned char *dst_prev = dst;
4084
4085           /* We have to produce designation sequences if any now.  */
4086           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4087           bol_designation = 0;
4088           /* We are sure that designation sequences are all ASCII bytes.  */
4089           produced_chars += dst - dst_prev;
4090         }
4091
4092       c = *charbuf++;
4093
4094       if (c < 0)
4095         {
4096           /* Handle an annotation.  */
4097           switch (*charbuf)
4098             {
4099             case CODING_ANNOTATE_COMPOSITION_MASK:
4100               /* Not yet implemented.  */
4101               break;
4102             case CODING_ANNOTATE_CHARSET_MASK:
4103               preferred_charset_id = charbuf[2];
4104               if (preferred_charset_id >= 0
4105                   && NILP (Fmemq (make_number (preferred_charset_id),
4106                                   charset_list)))
4107                 preferred_charset_id = -1;
4108               break;
4109             default:
4110               abort ();
4111             }
4112           charbuf += -c - 1;
4113           continue;
4114         }
4115
4116       /* Now encode the character C.  */
4117       if (c < 0x20 || c == 0x7F)
4118         {
4119           if (c == '\n'
4120               || (c == '\r' && EQ (eol_type, Qmac)))
4121             {
4122               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4123                 ENCODE_RESET_PLANE_AND_REGISTER ();
4124               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4125                 {
4126                   int i;
4127
4128                   for (i = 0; i < 4; i++)
4129                     CODING_ISO_DESIGNATION (coding, i)
4130                       = CODING_ISO_INITIAL (coding, i);
4131                 }
4132               bol_designation
4133                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4134             }
4135           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4136             ENCODE_RESET_PLANE_AND_REGISTER ();
4137           EMIT_ONE_ASCII_BYTE (c);
4138         }
4139       else if (ASCII_CHAR_P (c))
4140         {
4141           if (ascii_compatible)
4142             EMIT_ONE_ASCII_BYTE (c);
4143           else
4144             {
4145               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4146               ENCODE_ISO_CHARACTER (charset, c);
4147             }
4148         }
4149       else if (CHAR_BYTE8_P (c))
4150         {
4151           c = CHAR_TO_BYTE8 (c);
4152           EMIT_ONE_BYTE (c);
4153         }
4154       else
4155         {
4156           struct charset *charset;
4157
4158           if (preferred_charset_id >= 0)
4159             {
4160               charset = CHARSET_FROM_ID (preferred_charset_id);
4161               if (! CHAR_CHARSET_P (c, charset))
4162                 charset = char_charset (c, charset_list, NULL);
4163             }
4164           else
4165             charset = char_charset (c, charset_list, NULL);
4166           if (!charset)
4167             {
4168               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4169                 {
4170                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4171                   charset = CHARSET_FROM_ID (charset_ascii);
4172                 }
4173               else
4174                 {
4175                   c = coding->default_char;
4176                   charset = char_charset (c, charset_list, NULL);
4177                 }
4178             }
4179           ENCODE_ISO_CHARACTER (charset, c);
4180         }
4181     }
4182
4183   if (coding->mode & CODING_MODE_LAST_BLOCK
4184       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4185     {
4186       ASSURE_DESTINATION (safe_room);
4187       ENCODE_RESET_PLANE_AND_REGISTER ();
4188     }
4189   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4190   CODING_ISO_BOL (coding) = bol_designation;
4191   coding->produced_char += produced_chars;
4192   coding->produced = dst - coding->destination;
4193   return 0;
4194 }
4195
4196 \f
4197 /*** 8,9. SJIS and BIG5 handlers ***/
4198
4199 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4200    quite widely.  So, for the moment, Emacs supports them in the bare
4201    C code.  But, in the future, they may be supported only by CCL.  */
4202
4203 /* SJIS is a coding system encoding three character sets: ASCII, right
4204    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4205    as is.  A character of charset katakana-jisx0201 is encoded by
4206    "position-code + 0x80".  A character of charset japanese-jisx0208
4207    is encoded in 2-byte but two position-codes are divided and shifted
4208    so that it fit in the range below.
4209
4210    --- CODE RANGE of SJIS ---
4211    (character set)      (range)
4212    ASCII                0x00 .. 0x7F
4213    KATAKANA-JISX0201    0xA0 .. 0xDF
4214    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4215             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4216    -------------------------------
4217
4218 */
4219
4220 /* BIG5 is a coding system encoding two character sets: ASCII and
4221    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4222    character set and is encoded in two-byte.
4223
4224    --- CODE RANGE of BIG5 ---
4225    (character set)      (range)
4226    ASCII                0x00 .. 0x7F
4227    Big5 (1st byte)      0xA1 .. 0xFE
4228         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4229    --------------------------
4230
4231   */
4232
4233 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4234    Check if a text is encoded in SJIS.  If it is, return
4235    CATEGORY_MASK_SJIS, else return 0.  */
4236
4237 static int
4238 detect_coding_sjis (coding, detect_info)
4239      struct coding_system *coding;
4240      struct coding_detection_info *detect_info;
4241 {
4242   const unsigned char *src = coding->source, *src_base;
4243   const unsigned char *src_end = coding->source + coding->src_bytes;
4244   int multibytep = coding->src_multibyte;
4245   int consumed_chars = 0;
4246   int found = 0;
4247   int c;
4248
4249   detect_info->checked |= CATEGORY_MASK_SJIS;
4250   /* A coding system of this category is always ASCII compatible.  */
4251   src += coding->head_ascii;
4252
4253   while (1)
4254     {
4255       src_base = src;
4256       ONE_MORE_BYTE (c);
4257       if (c < 0x80)
4258         continue;
4259       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4260         {
4261           ONE_MORE_BYTE (c);
4262           if (c < 0x40 || c == 0x7F || c > 0xFC)
4263             break;
4264           found = CATEGORY_MASK_SJIS;
4265         }
4266       else if (c >= 0xA0 && c < 0xE0)
4267         found = CATEGORY_MASK_SJIS;
4268       else
4269         break;
4270     }
4271   detect_info->rejected |= CATEGORY_MASK_SJIS;
4272   return 0;
4273
4274  no_more_source:
4275   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4276     {
4277       detect_info->rejected |= CATEGORY_MASK_SJIS;
4278       return 0;
4279     }
4280   detect_info->found |= found;
4281   return 1;
4282 }
4283
4284 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4285    Check if a text is encoded in BIG5.  If it is, return
4286    CATEGORY_MASK_BIG5, else return 0.  */
4287
4288 static int
4289 detect_coding_big5 (coding, detect_info)
4290      struct coding_system *coding;
4291      struct coding_detection_info *detect_info;
4292 {
4293   const unsigned char *src = coding->source, *src_base;
4294   const unsigned char *src_end = coding->source + coding->src_bytes;
4295   int multibytep = coding->src_multibyte;
4296   int consumed_chars = 0;
4297   int found = 0;
4298   int c;
4299
4300   detect_info->checked |= CATEGORY_MASK_BIG5;
4301   /* A coding system of this category is always ASCII compatible.  */
4302   src += coding->head_ascii;
4303
4304   while (1)
4305     {
4306       src_base = src;
4307       ONE_MORE_BYTE (c);
4308       if (c < 0x80)
4309         continue;
4310       if (c >= 0xA1)
4311         {
4312           ONE_MORE_BYTE (c);
4313           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4314             return 0;
4315           found = CATEGORY_MASK_BIG5;
4316         }
4317       else
4318         break;
4319     }
4320   detect_info->rejected |= CATEGORY_MASK_BIG5;
4321   return 0;
4322
4323  no_more_source:
4324   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4325     {
4326       detect_info->rejected |= CATEGORY_MASK_BIG5;
4327       return 0;
4328     }
4329   detect_info->found |= found;
4330   return 1;
4331 }
4332
4333 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4334    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4335
4336 static void
4337 decode_coding_sjis (coding)
4338      struct coding_system *coding;
4339 {
4340   const unsigned char *src = coding->source + coding->consumed;
4341   const unsigned char *src_end = coding->source + coding->src_bytes;
4342   const unsigned char *src_base;
4343   int *charbuf = coding->charbuf + coding->charbuf_used;
4344   int *charbuf_end
4345     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4346   int consumed_chars = 0, consumed_chars_base;
4347   int multibytep = coding->src_multibyte;
4348   struct charset *charset_roman, *charset_kanji, *charset_kana;
4349   struct charset *charset_kanji2;
4350   Lisp_Object attrs, charset_list, val;
4351   int char_offset = coding->produced_char;
4352   int last_offset = char_offset;
4353   int last_id = charset_ascii;
4354   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4355   int byte_after_cr = -1;
4356
4357   CODING_GET_INFO (coding, attrs, charset_list);
4358
4359   val = charset_list;
4360   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4361   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4362   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4363   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4364
4365   while (1)
4366     {
4367       int c, c1;
4368       struct charset *charset;
4369
4370       src_base = src;
4371       consumed_chars_base = consumed_chars;
4372
4373       if (charbuf >= charbuf_end)
4374         break;
4375
4376       if (byte_after_cr >= 0)
4377         c = byte_after_cr, byte_after_cr = -1;
4378       else
4379         ONE_MORE_BYTE (c);
4380       if (c < 0)
4381         goto invalid_code;
4382       if (c < 0x80)
4383         {
4384           if (eol_crlf && c == '\r')
4385             ONE_MORE_BYTE (byte_after_cr);
4386           charset = charset_roman;
4387         }
4388       else if (c == 0x80 || c == 0xA0)
4389         goto invalid_code;
4390       else if (c >= 0xA1 && c <= 0xDF)
4391         {
4392           /* SJIS -> JISX0201-Kana */
4393           c &= 0x7F;
4394           charset = charset_kana;
4395         }
4396       else if (c <= 0xEF)
4397         {
4398           /* SJIS -> JISX0208 */
4399           ONE_MORE_BYTE (c1);
4400           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4401             goto invalid_code;
4402           c = (c << 8) | c1;
4403           SJIS_TO_JIS (c);
4404           charset = charset_kanji;
4405         }
4406       else if (c <= 0xFC && charset_kanji2)
4407         {
4408           /* SJIS -> JISX0213-2 */
4409           ONE_MORE_BYTE (c1);
4410           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4411             goto invalid_code;
4412           c = (c << 8) | c1;
4413           SJIS_TO_JIS2 (c);
4414           charset = charset_kanji2;
4415         }
4416       else
4417         goto invalid_code;
4418       if (charset->id != charset_ascii
4419           && last_id != charset->id)
4420         {
4421           if (last_id != charset_ascii)
4422             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4423           last_id = charset->id;
4424           last_offset = char_offset;
4425         }
4426       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4427       *charbuf++ = c;
4428       char_offset++;
4429       continue;
4430
4431     invalid_code:
4432       src = src_base;
4433       consumed_chars = consumed_chars_base;
4434       ONE_MORE_BYTE (c);
4435       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4436       char_offset++;
4437       coding->errors++;
4438     }
4439
4440  no_more_source:
4441   if (last_id != charset_ascii)
4442     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4443   coding->consumed_char += consumed_chars_base;
4444   coding->consumed = src_base - coding->source;
4445   coding->charbuf_used = charbuf - coding->charbuf;
4446 }
4447
4448 static void
4449 decode_coding_big5 (coding)
4450      struct coding_system *coding;
4451 {
4452   const unsigned char *src = coding->source + coding->consumed;
4453   const unsigned char *src_end = coding->source + coding->src_bytes;
4454   const unsigned char *src_base;
4455   int *charbuf = coding->charbuf + coding->charbuf_used;
4456   int *charbuf_end
4457     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4458   int consumed_chars = 0, consumed_chars_base;
4459   int multibytep = coding->src_multibyte;
4460   struct charset *charset_roman, *charset_big5;
4461   Lisp_Object attrs, charset_list, val;
4462   int char_offset = coding->produced_char;
4463   int last_offset = char_offset;
4464   int last_id = charset_ascii;
4465   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4466   int byte_after_cr = -1;
4467
4468   CODING_GET_INFO (coding, attrs, charset_list);
4469   val = charset_list;
4470   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4471   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4472
4473   while (1)
4474     {
4475       int c, c1;
4476       struct charset *charset;
4477
4478       src_base = src;
4479       consumed_chars_base = consumed_chars;
4480
4481       if (charbuf >= charbuf_end)
4482         break;
4483
4484       if (byte_after_cr >= 0)
4485         c = byte_after_cr, byte_after_cr = -1;
4486       else
4487         ONE_MORE_BYTE (c);
4488
4489       if (c < 0)
4490         goto invalid_code;
4491       if (c < 0x80)
4492         {
4493           if (eol_crlf && c == '\r')
4494             ONE_MORE_BYTE (byte_after_cr);
4495           charset = charset_roman;
4496         }
4497       else
4498         {
4499           /* BIG5 -> Big5 */
4500           if (c < 0xA1 || c > 0xFE)
4501             goto invalid_code;
4502           ONE_MORE_BYTE (c1);
4503           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4504             goto invalid_code;
4505           c = c << 8 | c1;
4506           charset = charset_big5;
4507         }
4508       if (charset->id != charset_ascii
4509           && last_id != charset->id)
4510         {
4511           if (last_id != charset_ascii)
4512             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4513           last_id = charset->id;
4514           last_offset = char_offset;
4515         }
4516       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4517       *charbuf++ = c;
4518       char_offset++;
4519       continue;
4520
4521     invalid_code:
4522       src = src_base;
4523       consumed_chars = consumed_chars_base;
4524       ONE_MORE_BYTE (c);
4525       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4526       char_offset++;
4527       coding->errors++;
4528     }
4529
4530  no_more_source:
4531   if (last_id != charset_ascii)
4532     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4533   coding->consumed_char += consumed_chars_base;
4534   coding->consumed = src_base - coding->source;
4535   coding->charbuf_used = charbuf - coding->charbuf;
4536 }
4537
4538 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4539    This function can encode charsets `ascii', `katakana-jisx0201',
4540    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4541    are sure that all these charsets are registered as official charset
4542    (i.e. do not have extended leading-codes).  Characters of other
4543    charsets are produced without any encoding.  If SJIS_P is 1, encode
4544    SJIS text, else encode BIG5 text.  */
4545
4546 static int
4547 encode_coding_sjis (coding)
4548      struct coding_system *coding;
4549 {
4550   int multibytep = coding->dst_multibyte;
4551   int *charbuf = coding->charbuf;
4552   int *charbuf_end = charbuf + coding->charbuf_used;
4553   unsigned char *dst = coding->destination + coding->produced;
4554   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4555   int safe_room = 4;
4556   int produced_chars = 0;
4557   Lisp_Object attrs, charset_list, val;
4558   int ascii_compatible;
4559   struct charset *charset_roman, *charset_kanji, *charset_kana;
4560   struct charset *charset_kanji2;
4561   int c;
4562
4563   CODING_GET_INFO (coding, attrs, charset_list);
4564   val = charset_list;
4565   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4566   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4567   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4568   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4569
4570   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4571
4572   while (charbuf < charbuf_end)
4573     {
4574       ASSURE_DESTINATION (safe_room);
4575       c = *charbuf++;
4576       /* Now encode the character C.  */
4577       if (ASCII_CHAR_P (c) && ascii_compatible)
4578         EMIT_ONE_ASCII_BYTE (c);
4579       else if (CHAR_BYTE8_P (c))
4580         {
4581           c = CHAR_TO_BYTE8 (c);
4582           EMIT_ONE_BYTE (c);
4583         }
4584       else
4585         {
4586           unsigned code;
4587           struct charset *charset = char_charset (c, charset_list, &code);
4588
4589           if (!charset)
4590             {
4591               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4592                 {
4593                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4594                   charset = CHARSET_FROM_ID (charset_ascii);
4595                 }
4596               else
4597                 {
4598                   c = coding->default_char;
4599                   charset = char_charset (c, charset_list, &code);
4600                 }
4601             }
4602           if (code == CHARSET_INVALID_CODE (charset))
4603             abort ();
4604           if (charset == charset_kanji)
4605             {
4606               int c1, c2;
4607               JIS_TO_SJIS (code);
4608               c1 = code >> 8, c2 = code & 0xFF;
4609               EMIT_TWO_BYTES (c1, c2);
4610             }
4611           else if (charset == charset_kana)
4612             EMIT_ONE_BYTE (code | 0x80);
4613           else if (charset_kanji2 && charset == charset_kanji2)
4614             {
4615               int c1, c2;
4616
4617               c1 = code >> 8;
4618               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4619                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4620                 {
4621                   JIS_TO_SJIS2 (code);
4622                   c1 = code >> 8, c2 = code & 0xFF;
4623                   EMIT_TWO_BYTES (c1, c2);
4624                 }
4625               else
4626                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4627             }
4628           else
4629             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4630         }
4631     }
4632   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4633   coding->produced_char += produced_chars;
4634   coding->produced = dst - coding->destination;
4635   return 0;
4636 }
4637
4638 static int
4639 encode_coding_big5 (coding)
4640      struct coding_system *coding;
4641 {
4642   int multibytep = coding->dst_multibyte;
4643   int *charbuf = coding->charbuf;
4644   int *charbuf_end = charbuf + coding->charbuf_used;
4645   unsigned char *dst = coding->destination + coding->produced;
4646   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4647   int safe_room = 4;
4648   int produced_chars = 0;
4649   Lisp_Object attrs, charset_list, val;
4650   int ascii_compatible;
4651   struct charset *charset_roman, *charset_big5;
4652   int c;
4653
4654   CODING_GET_INFO (coding, attrs, charset_list);
4655   val = charset_list;
4656   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4657   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4658   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4659
4660   while (charbuf < charbuf_end)
4661     {
4662       ASSURE_DESTINATION (safe_room);
4663       c = *charbuf++;
4664       /* Now encode the character C.  */
4665       if (ASCII_CHAR_P (c) && ascii_compatible)
4666         EMIT_ONE_ASCII_BYTE (c);
4667       else if (CHAR_BYTE8_P (c))
4668         {
4669           c = CHAR_TO_BYTE8 (c);
4670           EMIT_ONE_BYTE (c);
4671         }
4672       else
4673         {
4674           unsigned code;
4675           struct charset *charset = char_charset (c, charset_list, &code);
4676
4677           if (! charset)
4678             {
4679               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4680                 {
4681                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4682                   charset = CHARSET_FROM_ID (charset_ascii);
4683                 }
4684               else
4685                 {
4686                   c = coding->default_char;
4687                   charset = char_charset (c, charset_list, &code);
4688                 }
4689             }
4690           if (code == CHARSET_INVALID_CODE (charset))
4691             abort ();
4692           if (charset == charset_big5)
4693             {
4694               int c1, c2;
4695
4696               c1 = code >> 8, c2 = code & 0xFF;
4697               EMIT_TWO_BYTES (c1, c2);
4698             }
4699           else
4700             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4701         }
4702     }
4703   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4704   coding->produced_char += produced_chars;
4705   coding->produced = dst - coding->destination;
4706   return 0;
4707 }
4708
4709 \f
4710 /*** 10. CCL handlers ***/
4711
4712 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4713    Check if a text is encoded in a coding system of which
4714    encoder/decoder are written in CCL program.  If it is, return
4715    CATEGORY_MASK_CCL, else return 0.  */
4716
4717 static int
4718 detect_coding_ccl (coding, detect_info)
4719      struct coding_system *coding;
4720      struct coding_detection_info *detect_info;
4721 {
4722   const unsigned char *src = coding->source, *src_base;
4723   const unsigned char *src_end = coding->source + coding->src_bytes;
4724   int multibytep = coding->src_multibyte;
4725   int consumed_chars = 0;
4726   int found = 0;
4727   unsigned char *valids;
4728   int head_ascii = coding->head_ascii;
4729   Lisp_Object attrs;
4730
4731   detect_info->checked |= CATEGORY_MASK_CCL;
4732
4733   coding = &coding_categories[coding_category_ccl];
4734   valids = CODING_CCL_VALIDS (coding);
4735   attrs = CODING_ID_ATTRS (coding->id);
4736   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4737     src += head_ascii;
4738
4739   while (1)
4740     {
4741       int c;
4742
4743       src_base = src;
4744       ONE_MORE_BYTE (c);
4745       if (c < 0 || ! valids[c])
4746         break;
4747       if ((valids[c] > 1))
4748         found = CATEGORY_MASK_CCL;
4749     }
4750   detect_info->rejected |= CATEGORY_MASK_CCL;
4751   return 0;
4752
4753  no_more_source:
4754   detect_info->found |= found;
4755   return 1;
4756 }
4757
4758 static void
4759 decode_coding_ccl (coding)
4760      struct coding_system *coding;
4761 {
4762   const unsigned char *src = coding->source + coding->consumed;
4763   const unsigned char *src_end = coding->source + coding->src_bytes;
4764   int *charbuf = coding->charbuf + coding->charbuf_used;
4765   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4766   int consumed_chars = 0;
4767   int multibytep = coding->src_multibyte;
4768   struct ccl_program ccl;
4769   int source_charbuf[1024];
4770   int source_byteidx[1024];
4771   Lisp_Object attrs, charset_list;
4772
4773   CODING_GET_INFO (coding, attrs, charset_list);
4774   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4775
4776   while (src < src_end)
4777     {
4778       const unsigned char *p = src;
4779       int *source, *source_end;
4780       int i = 0;
4781
4782       if (multibytep)
4783         while (i < 1024 && p < src_end)
4784           {
4785             source_byteidx[i] = p - src;
4786             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4787           }
4788       else
4789         while (i < 1024 && p < src_end)
4790           source_charbuf[i++] = *p++;
4791
4792       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4793         ccl.last_block = 1;
4794
4795       source = source_charbuf;
4796       source_end = source + i;
4797       while (source < source_end)
4798         {
4799           ccl_driver (&ccl, source, charbuf,
4800                       source_end - source, charbuf_end - charbuf,
4801                       charset_list);
4802           source += ccl.consumed;
4803           charbuf += ccl.produced;
4804           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4805             break;
4806         }
4807       if (source < source_end)
4808         src += source_byteidx[source - source_charbuf];
4809       else
4810         src = p;
4811       consumed_chars += source - source_charbuf;
4812
4813       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4814           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4815         break;
4816     }
4817
4818   switch (ccl.status)
4819     {
4820     case CCL_STAT_SUSPEND_BY_SRC:
4821       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4822       break;
4823     case CCL_STAT_SUSPEND_BY_DST:
4824       break;
4825     case CCL_STAT_QUIT:
4826     case CCL_STAT_INVALID_CMD:
4827       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4828       break;
4829     default:
4830       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4831       break;
4832     }
4833   coding->consumed_char += consumed_chars;
4834   coding->consumed = src - coding->source;
4835   coding->charbuf_used = charbuf - coding->charbuf;
4836 }
4837
4838 static int
4839 encode_coding_ccl (coding)
4840      struct coding_system *coding;
4841 {
4842   struct ccl_program ccl;
4843   int multibytep = coding->dst_multibyte;
4844   int *charbuf = coding->charbuf;
4845   int *charbuf_end = charbuf + coding->charbuf_used;
4846   unsigned char *dst = coding->destination + coding->produced;
4847   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4848   int destination_charbuf[1024];
4849   int i, produced_chars = 0;
4850   Lisp_Object attrs, charset_list;
4851
4852   CODING_GET_INFO (coding, attrs, charset_list);
4853   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4854
4855   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4856   ccl.dst_multibyte = coding->dst_multibyte;
4857
4858   while (charbuf < charbuf_end)
4859     {
4860       ccl_driver (&ccl, charbuf, destination_charbuf,
4861                   charbuf_end - charbuf, 1024, charset_list);
4862       if (multibytep)
4863         {
4864           ASSURE_DESTINATION (ccl.produced * 2);
4865           for (i = 0; i < ccl.produced; i++)
4866             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4867         }
4868       else
4869         {
4870           ASSURE_DESTINATION (ccl.produced);
4871           for (i = 0; i < ccl.produced; i++)
4872             *dst++ = destination_charbuf[i] & 0xFF;
4873           produced_chars += ccl.produced;
4874         }
4875       charbuf += ccl.consumed;
4876       if (ccl.status == CCL_STAT_QUIT
4877           || ccl.status == CCL_STAT_INVALID_CMD)
4878         break;
4879     }
4880
4881   switch (ccl.status)
4882     {
4883     case CCL_STAT_SUSPEND_BY_SRC:
4884       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4885       break;
4886     case CCL_STAT_SUSPEND_BY_DST:
4887       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4888       break;
4889     case CCL_STAT_QUIT:
4890     case CCL_STAT_INVALID_CMD:
4891       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4892       break;
4893     default:
4894       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4895       break;
4896     }
4897
4898   coding->produced_char += produced_chars;
4899   coding->produced = dst - coding->destination;
4900   return 0;
4901 }
4902
4903
4904 \f
4905 /*** 10, 11. no-conversion handlers ***/
4906
4907 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4908
4909 static void
4910 decode_coding_raw_text (coding)
4911      struct coding_system *coding;
4912 {
4913   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4914
4915   coding->chars_at_source = 1;
4916   coding->consumed_char = coding->src_chars;
4917   coding->consumed = coding->src_bytes;
4918   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4919     {
4920       coding->consumed_char--;
4921       coding->consumed--;
4922       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4923     }
4924   else
4925     record_conversion_result (coding, CODING_RESULT_SUCCESS);
4926 }
4927
4928 static int
4929 encode_coding_raw_text (coding)
4930      struct coding_system *coding;
4931 {
4932   int multibytep = coding->dst_multibyte;
4933   int *charbuf = coding->charbuf;
4934   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4935   unsigned char *dst = coding->destination + coding->produced;
4936   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4937   int produced_chars = 0;
4938   int c;
4939
4940   if (multibytep)
4941     {
4942       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4943
4944       if (coding->src_multibyte)
4945         while (charbuf < charbuf_end)
4946           {
4947             ASSURE_DESTINATION (safe_room);
4948             c = *charbuf++;
4949             if (ASCII_CHAR_P (c))
4950               EMIT_ONE_ASCII_BYTE (c);
4951             else if (CHAR_BYTE8_P (c))
4952               {
4953                 c = CHAR_TO_BYTE8 (c);
4954                 EMIT_ONE_BYTE (c);
4955               }
4956             else
4957               {
4958                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4959
4960                 CHAR_STRING_ADVANCE (c, p1);
4961                 while (p0 < p1)
4962                   {
4963                     EMIT_ONE_BYTE (*p0);
4964                     p0++;
4965                   }
4966               }
4967           }
4968       else
4969         while (charbuf < charbuf_end)
4970           {
4971             ASSURE_DESTINATION (safe_room);
4972             c = *charbuf++;
4973             EMIT_ONE_BYTE (c);
4974           }
4975     }
4976   else
4977     {
4978       if (coding->src_multibyte)
4979         {
4980           int safe_room = MAX_MULTIBYTE_LENGTH;
4981
4982           while (charbuf < charbuf_end)
4983             {
4984               ASSURE_DESTINATION (safe_room);
4985               c = *charbuf++;
4986               if (ASCII_CHAR_P (c))
4987                 *dst++ = c;
4988               else if (CHAR_BYTE8_P (c))
4989                 *dst++ = CHAR_TO_BYTE8 (c);
4990               else
4991                 CHAR_STRING_ADVANCE (c, dst);
4992             }
4993         }
4994       else
4995         {
4996           ASSURE_DESTINATION (charbuf_end - charbuf);
4997           while (charbuf < charbuf_end && dst < dst_end)
4998             *dst++ = *charbuf++;
4999         }
5000       produced_chars = dst - (coding->destination + coding->produced);
5001     }
5002   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5003   coding->produced_char += produced_chars;
5004   coding->produced = dst - coding->destination;
5005   return 0;
5006 }
5007
5008 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5009    Check if a text is encoded in a charset-based coding system.  If it
5010    is, return 1, else return 0.  */
5011
5012 static int
5013 detect_coding_charset (coding, detect_info)
5014      struct coding_system *coding;
5015      struct coding_detection_info *detect_info;
5016 {
5017   const unsigned char *src = coding->source, *src_base;
5018   const unsigned char *src_end = coding->source + coding->src_bytes;
5019   int multibytep = coding->src_multibyte;
5020   int consumed_chars = 0;
5021   Lisp_Object attrs, valids, name;
5022   int found = 0;
5023   int head_ascii = coding->head_ascii;
5024   int check_latin_extra = 0;
5025
5026   detect_info->checked |= CATEGORY_MASK_CHARSET;
5027
5028   coding = &coding_categories[coding_category_charset];
5029   attrs = CODING_ID_ATTRS (coding->id);
5030   valids = AREF (attrs, coding_attr_charset_valids);
5031   name = CODING_ID_NAME (coding->id);
5032   if (VECTORP (Vlatin_extra_code_table)
5033       && strcmp ((char *) SDATA (SYMBOL_NAME (name)), "iso-8859-"))
5034     check_latin_extra = 1;
5035   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5036     src += head_ascii;
5037
5038   while (1)
5039     {
5040       int c;
5041       Lisp_Object val;
5042       struct charset *charset;
5043       int dim, idx;
5044
5045       src_base = src;
5046       ONE_MORE_BYTE (c);
5047       if (c < 0)
5048         continue;
5049       val = AREF (valids, c);
5050       if (NILP (val))
5051         break;
5052       if (c >= 0x80)
5053         {
5054           if (c < 0xA0
5055               && check_latin_extra
5056               && NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
5057             break;
5058           found = CATEGORY_MASK_CHARSET;
5059         }
5060       if (INTEGERP (val))
5061         {
5062           charset = CHARSET_FROM_ID (XFASTINT (val));
5063           dim = CHARSET_DIMENSION (charset);
5064           for (idx = 1; idx < dim; idx++)
5065             {
5066               if (src == src_end)
5067                 goto too_short;
5068               ONE_MORE_BYTE (c);
5069               if (c < charset->code_space[(dim - 1 - idx) * 2]
5070                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5071                 break;
5072             }
5073           if (idx < dim)
5074             break;
5075         }
5076       else
5077         {
5078           idx = 1;
5079           for (; CONSP (val); val = XCDR (val))
5080             {
5081               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5082               dim = CHARSET_DIMENSION (charset);
5083               while (idx < dim)
5084                 {
5085                   if (src == src_end)
5086                     goto too_short;
5087                   ONE_MORE_BYTE (c);
5088                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5089                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5090                     break;
5091                   idx++;
5092                 }
5093               if (idx == dim)
5094                 {
5095                   val = Qnil;
5096                   break;
5097                 }
5098             }
5099           if (CONSP (val))
5100             break;
5101         }
5102     }
5103  too_short:
5104   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5105   return 0;
5106
5107  no_more_source:
5108   detect_info->found |= found;
5109   return 1;
5110 }
5111
5112 static void
5113 decode_coding_charset (coding)
5114      struct coding_system *coding;
5115 {
5116   const unsigned char *src = coding->source + coding->consumed;
5117   const unsigned char *src_end = coding->source + coding->src_bytes;
5118   const unsigned char *src_base;
5119   int *charbuf = coding->charbuf + coding->charbuf_used;
5120   int *charbuf_end
5121     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
5122   int consumed_chars = 0, consumed_chars_base;
5123   int multibytep = coding->src_multibyte;
5124   Lisp_Object attrs, charset_list, valids;
5125   int char_offset = coding->produced_char;
5126   int last_offset = char_offset;
5127   int last_id = charset_ascii;
5128   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5129   int byte_after_cr = -1;
5130
5131   CODING_GET_INFO (coding, attrs, charset_list);
5132   valids = AREF (attrs, coding_attr_charset_valids);
5133
5134   while (1)
5135     {
5136       int c;
5137       Lisp_Object val;
5138       struct charset *charset;
5139       int dim;
5140       int len = 1;
5141       unsigned code;
5142
5143       src_base = src;
5144       consumed_chars_base = consumed_chars;
5145
5146       if (charbuf >= charbuf_end)
5147         break;
5148
5149       if (byte_after_cr >= 0)
5150         {
5151           c = byte_after_cr;
5152           byte_after_cr = -1;
5153         }
5154       else
5155         {
5156           ONE_MORE_BYTE (c);
5157           if (eol_crlf && c == '\r')
5158             ONE_MORE_BYTE (byte_after_cr);
5159         }
5160       if (c < 0)
5161         goto invalid_code;
5162       code = c;
5163
5164       val = AREF (valids, c);
5165       if (! INTEGERP (val) && ! CONSP (val))
5166         goto invalid_code;
5167       if (INTEGERP (val))
5168         {
5169           charset = CHARSET_FROM_ID (XFASTINT (val));
5170           dim = CHARSET_DIMENSION (charset);
5171           while (len < dim)
5172             {
5173               ONE_MORE_BYTE (c);
5174               code = (code << 8) | c;
5175               len++;
5176             }
5177           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5178                               charset, code, c);
5179         }
5180       else
5181         {
5182           /* VAL is a list of charset IDs.  It is assured that the
5183              list is sorted by charset dimensions (smaller one
5184              comes first).  */
5185           while (CONSP (val))
5186             {
5187               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5188               dim = CHARSET_DIMENSION (charset);
5189               while (len < dim)
5190                 {
5191                   ONE_MORE_BYTE (c);
5192                   code = (code << 8) | c;
5193                   len++;
5194                 }
5195               CODING_DECODE_CHAR (coding, src, src_base,
5196                                   src_end, charset, code, c);
5197               if (c >= 0)
5198                 break;
5199               val = XCDR (val);
5200             }
5201         }
5202       if (c < 0)
5203         goto invalid_code;
5204       if (charset->id != charset_ascii
5205           && last_id != charset->id)
5206         {
5207           if (last_id != charset_ascii)
5208             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5209           last_id = charset->id;
5210           last_offset = char_offset;
5211         }
5212
5213       *charbuf++ = c;
5214       char_offset++;
5215       continue;
5216
5217     invalid_code:
5218       src = src_base;
5219       consumed_chars = consumed_chars_base;
5220       ONE_MORE_BYTE (c);
5221       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5222       char_offset++;
5223       coding->errors++;
5224     }
5225
5226  no_more_source:
5227   if (last_id != charset_ascii)
5228     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5229   coding->consumed_char += consumed_chars_base;
5230   coding->consumed = src_base - coding->source;
5231   coding->charbuf_used = charbuf - coding->charbuf;
5232 }
5233
5234 static int
5235 encode_coding_charset (coding)
5236      struct coding_system *coding;
5237 {
5238   int multibytep = coding->dst_multibyte;
5239   int *charbuf = coding->charbuf;
5240   int *charbuf_end = charbuf + coding->charbuf_used;
5241   unsigned char *dst = coding->destination + coding->produced;
5242   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5243   int safe_room = MAX_MULTIBYTE_LENGTH;
5244   int produced_chars = 0;
5245   Lisp_Object attrs, charset_list;
5246   int ascii_compatible;
5247   int c;
5248
5249   CODING_GET_INFO (coding, attrs, charset_list);
5250   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5251
5252   while (charbuf < charbuf_end)
5253     {
5254       struct charset *charset;
5255       unsigned code;
5256
5257       ASSURE_DESTINATION (safe_room);
5258       c = *charbuf++;
5259       if (ascii_compatible && ASCII_CHAR_P (c))
5260         EMIT_ONE_ASCII_BYTE (c);
5261       else if (CHAR_BYTE8_P (c))
5262         {
5263           c = CHAR_TO_BYTE8 (c);
5264           EMIT_ONE_BYTE (c);
5265         }
5266       else
5267         {
5268           charset = char_charset (c, charset_list, &code);
5269           if (charset)
5270             {
5271               if (CHARSET_DIMENSION (charset) == 1)
5272                 EMIT_ONE_BYTE (code);
5273               else if (CHARSET_DIMENSION (charset) == 2)
5274                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5275               else if (CHARSET_DIMENSION (charset) == 3)
5276                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5277               else
5278                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5279                                  (code >> 8) & 0xFF, code & 0xFF);
5280             }
5281           else
5282             {
5283               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5284                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5285               else
5286                 c = coding->default_char;
5287               EMIT_ONE_BYTE (c);
5288             }
5289         }
5290     }
5291
5292   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5293   coding->produced_char += produced_chars;
5294   coding->produced = dst - coding->destination;
5295   return 0;
5296 }
5297
5298 \f
5299 /*** 7. C library functions ***/
5300
5301 /* Setup coding context CODING from information about CODING_SYSTEM.
5302    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5303    CODING_SYSTEM is invalid, signal an error.  */
5304
5305 void
5306 setup_coding_system (coding_system, coding)
5307      Lisp_Object coding_system;
5308      struct coding_system *coding;
5309 {
5310   Lisp_Object attrs;
5311   Lisp_Object eol_type;
5312   Lisp_Object coding_type;
5313   Lisp_Object val;
5314
5315   if (NILP (coding_system))
5316     coding_system = Qundecided;
5317
5318   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5319
5320   attrs = CODING_ID_ATTRS (coding->id);
5321   eol_type = CODING_ID_EOL_TYPE (coding->id);
5322
5323   coding->mode = 0;
5324   coding->head_ascii = -1;
5325   if (VECTORP (eol_type))
5326     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5327                             | CODING_REQUIRE_DETECTION_MASK);
5328   else if (! EQ (eol_type, Qunix))
5329     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5330                             | CODING_REQUIRE_ENCODING_MASK);
5331   else
5332     coding->common_flags = 0;
5333   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5334     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5335   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5336     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5337   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5338     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5339
5340   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5341   coding->max_charset_id = SCHARS (val) - 1;
5342   coding->safe_charsets = (char *) SDATA (val);
5343   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5344
5345   coding_type = CODING_ATTR_TYPE (attrs);
5346   if (EQ (coding_type, Qundecided))
5347     {
5348       coding->detector = NULL;
5349       coding->decoder = decode_coding_raw_text;
5350       coding->encoder = encode_coding_raw_text;
5351       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5352     }
5353   else if (EQ (coding_type, Qiso_2022))
5354     {
5355       int i;
5356       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5357
5358       /* Invoke graphic register 0 to plane 0.  */
5359       CODING_ISO_INVOCATION (coding, 0) = 0;
5360       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5361       CODING_ISO_INVOCATION (coding, 1)
5362         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5363       /* Setup the initial status of designation.  */
5364       for (i = 0; i < 4; i++)
5365         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5366       /* Not single shifting initially.  */
5367       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5368       /* Beginning of buffer should also be regarded as bol. */
5369       CODING_ISO_BOL (coding) = 1;
5370       coding->detector = detect_coding_iso_2022;
5371       coding->decoder = decode_coding_iso_2022;
5372       coding->encoder = encode_coding_iso_2022;
5373       if (flags & CODING_ISO_FLAG_SAFE)
5374         coding->mode |= CODING_MODE_SAFE_ENCODING;
5375       coding->common_flags
5376         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5377             | CODING_REQUIRE_FLUSHING_MASK);
5378       if (flags & CODING_ISO_FLAG_COMPOSITION)
5379         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5380       if (flags & CODING_ISO_FLAG_DESIGNATION)
5381         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5382       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5383         {
5384           setup_iso_safe_charsets (attrs);
5385           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5386           coding->max_charset_id = SCHARS (val) - 1;
5387           coding->safe_charsets = (char *) SDATA (val);
5388         }
5389       CODING_ISO_FLAGS (coding) = flags;
5390     }
5391   else if (EQ (coding_type, Qcharset))
5392     {
5393       coding->detector = detect_coding_charset;
5394       coding->decoder = decode_coding_charset;
5395       coding->encoder = encode_coding_charset;
5396       coding->common_flags
5397         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5398     }
5399   else if (EQ (coding_type, Qutf_8))
5400     {
5401       val = AREF (attrs, coding_attr_utf_bom);
5402       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5403                                    : EQ (val, Qt) ? utf_with_bom
5404                                    : utf_without_bom);
5405       coding->detector = detect_coding_utf_8;
5406       coding->decoder = decode_coding_utf_8;
5407       coding->encoder = encode_coding_utf_8;
5408       coding->common_flags
5409         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5410       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5411         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5412     }
5413   else if (EQ (coding_type, Qutf_16))
5414     {
5415       val = AREF (attrs, coding_attr_utf_bom);
5416       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5417                                     : EQ (val, Qt) ? utf_with_bom
5418                                     : utf_without_bom);
5419       val = AREF (attrs, coding_attr_utf_16_endian);
5420       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5421                                        : utf_16_little_endian);
5422       CODING_UTF_16_SURROGATE (coding) = 0;
5423       coding->detector = detect_coding_utf_16;
5424       coding->decoder = decode_coding_utf_16;
5425       coding->encoder = encode_coding_utf_16;
5426       coding->common_flags
5427         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5428       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5429         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5430     }
5431   else if (EQ (coding_type, Qccl))
5432     {
5433       coding->detector = detect_coding_ccl;
5434       coding->decoder = decode_coding_ccl;
5435       coding->encoder = encode_coding_ccl;
5436       coding->common_flags
5437         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5438             | CODING_REQUIRE_FLUSHING_MASK);
5439     }
5440   else if (EQ (coding_type, Qemacs_mule))
5441     {
5442       coding->detector = detect_coding_emacs_mule;
5443       coding->decoder = decode_coding_emacs_mule;
5444       coding->encoder = encode_coding_emacs_mule;
5445       coding->common_flags
5446         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5447       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5448           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5449         {
5450           Lisp_Object tail, safe_charsets;
5451           int max_charset_id = 0;
5452
5453           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5454                tail = XCDR (tail))
5455             if (max_charset_id < XFASTINT (XCAR (tail)))
5456               max_charset_id = XFASTINT (XCAR (tail));
5457           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5458                                         make_number (255));
5459           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5460                tail = XCDR (tail))
5461             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5462           coding->max_charset_id = max_charset_id;
5463           coding->safe_charsets = (char *) SDATA (safe_charsets);
5464         }
5465     }
5466   else if (EQ (coding_type, Qshift_jis))
5467     {
5468       coding->detector = detect_coding_sjis;
5469       coding->decoder = decode_coding_sjis;
5470       coding->encoder = encode_coding_sjis;
5471       coding->common_flags
5472         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5473     }
5474   else if (EQ (coding_type, Qbig5))
5475     {
5476       coding->detector = detect_coding_big5;
5477       coding->decoder = decode_coding_big5;
5478       coding->encoder = encode_coding_big5;
5479       coding->common_flags
5480         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5481     }
5482   else                          /* EQ (coding_type, Qraw_text) */
5483     {
5484       coding->detector = NULL;
5485       coding->decoder = decode_coding_raw_text;
5486       coding->encoder = encode_coding_raw_text;
5487       if (! EQ (eol_type, Qunix))
5488         {
5489           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5490           if (! VECTORP (eol_type))
5491             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5492         }
5493
5494     }
5495
5496   return;
5497 }
5498
5499 /* Return a list of charsets supported by CODING.  */
5500
5501 Lisp_Object
5502 coding_charset_list (coding)
5503      struct coding_system *coding;
5504 {
5505   Lisp_Object attrs, charset_list;
5506
5507   CODING_GET_INFO (coding, attrs, charset_list);
5508   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5509     {
5510       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5511
5512       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5513         charset_list = Viso_2022_charset_list;
5514     }
5515   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5516     {
5517       charset_list = Vemacs_mule_charset_list;
5518     }
5519   return charset_list;
5520 }
5521
5522
5523 /* Return raw-text or one of its subsidiaries that has the same
5524    eol_type as CODING-SYSTEM.  */
5525
5526 Lisp_Object
5527 raw_text_coding_system (coding_system)
5528      Lisp_Object coding_system;
5529 {
5530   Lisp_Object spec, attrs;
5531   Lisp_Object eol_type, raw_text_eol_type;
5532
5533   if (NILP (coding_system))
5534     return Qraw_text;
5535   spec = CODING_SYSTEM_SPEC (coding_system);
5536   attrs = AREF (spec, 0);
5537
5538   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5539     return coding_system;
5540
5541   eol_type = AREF (spec, 2);
5542   if (VECTORP (eol_type))
5543     return Qraw_text;
5544   spec = CODING_SYSTEM_SPEC (Qraw_text);
5545   raw_text_eol_type = AREF (spec, 2);
5546   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5547           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5548           : AREF (raw_text_eol_type, 2));
5549 }
5550
5551
5552 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5553    does, return one of the subsidiary that has the same eol-spec as
5554    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
5555    inherit end-of-line format from the system's setting
5556    (system_eol_type).  */
5557
5558 Lisp_Object
5559 coding_inherit_eol_type (coding_system, parent)
5560      Lisp_Object coding_system, parent;
5561 {
5562   Lisp_Object spec, eol_type;
5563
5564   if (NILP (coding_system))
5565     coding_system = Qraw_text;
5566   spec = CODING_SYSTEM_SPEC (coding_system);
5567   eol_type = AREF (spec, 2);
5568   if (VECTORP (eol_type))
5569     {
5570       Lisp_Object parent_eol_type;
5571
5572       if (! NILP (parent))
5573         {
5574           Lisp_Object parent_spec;
5575
5576           parent_spec = CODING_SYSTEM_SPEC (parent);
5577           parent_eol_type = AREF (parent_spec, 2);
5578         }
5579       else
5580         parent_eol_type = system_eol_type;
5581       if (EQ (parent_eol_type, Qunix))
5582         coding_system = AREF (eol_type, 0);
5583       else if (EQ (parent_eol_type, Qdos))
5584         coding_system = AREF (eol_type, 1);
5585       else if (EQ (parent_eol_type, Qmac))
5586         coding_system = AREF (eol_type, 2);
5587     }
5588   return coding_system;
5589 }
5590
5591 /* Emacs has a mechanism to automatically detect a coding system if it
5592    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5593    it's impossible to distinguish some coding systems accurately
5594    because they use the same range of codes.  So, at first, coding
5595    systems are categorized into 7, those are:
5596
5597    o coding-category-emacs-mule
5598
5599         The category for a coding system which has the same code range
5600         as Emacs' internal format.  Assigned the coding-system (Lisp
5601         symbol) `emacs-mule' by default.
5602
5603    o coding-category-sjis
5604
5605         The category for a coding system which has the same code range
5606         as SJIS.  Assigned the coding-system (Lisp
5607         symbol) `japanese-shift-jis' by default.
5608
5609    o coding-category-iso-7
5610
5611         The category for a coding system which has the same code range
5612         as ISO2022 of 7-bit environment.  This doesn't use any locking
5613         shift and single shift functions.  This can encode/decode all
5614         charsets.  Assigned the coding-system (Lisp symbol)
5615         `iso-2022-7bit' by default.
5616
5617    o coding-category-iso-7-tight
5618
5619         Same as coding-category-iso-7 except that this can
5620         encode/decode only the specified charsets.
5621
5622    o coding-category-iso-8-1
5623
5624         The category for a coding system which has the same code range
5625         as ISO2022 of 8-bit environment and graphic plane 1 used only
5626         for DIMENSION1 charset.  This doesn't use any locking shift
5627         and single shift functions.  Assigned the coding-system (Lisp
5628         symbol) `iso-latin-1' by default.
5629
5630    o coding-category-iso-8-2
5631
5632         The category for a coding system which has the same code range
5633         as ISO2022 of 8-bit environment and graphic plane 1 used only
5634         for DIMENSION2 charset.  This doesn't use any locking shift
5635         and single shift functions.  Assigned the coding-system (Lisp
5636         symbol) `japanese-iso-8bit' by default.
5637
5638    o coding-category-iso-7-else
5639
5640         The category for a coding system which has the same code range
5641         as ISO2022 of 7-bit environemnt but uses locking shift or
5642         single shift functions.  Assigned the coding-system (Lisp
5643         symbol) `iso-2022-7bit-lock' by default.
5644
5645    o coding-category-iso-8-else
5646
5647         The category for a coding system which has the same code range
5648         as ISO2022 of 8-bit environemnt but uses locking shift or
5649         single shift functions.  Assigned the coding-system (Lisp
5650         symbol) `iso-2022-8bit-ss2' by default.
5651
5652    o coding-category-big5
5653
5654         The category for a coding system which has the same code range
5655         as BIG5.  Assigned the coding-system (Lisp symbol)
5656         `cn-big5' by default.
5657
5658    o coding-category-utf-8
5659
5660         The category for a coding system which has the same code range
5661         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5662         symbol) `utf-8' by default.
5663
5664    o coding-category-utf-16-be
5665
5666         The category for a coding system in which a text has an
5667         Unicode signature (cf. Unicode Standard) in the order of BIG
5668         endian at the head.  Assigned the coding-system (Lisp symbol)
5669         `utf-16-be' by default.
5670
5671    o coding-category-utf-16-le
5672
5673         The category for a coding system in which a text has an
5674         Unicode signature (cf. Unicode Standard) in the order of
5675         LITTLE endian at the head.  Assigned the coding-system (Lisp
5676         symbol) `utf-16-le' by default.
5677
5678    o coding-category-ccl
5679
5680         The category for a coding system of which encoder/decoder is
5681         written in CCL programs.  The default value is nil, i.e., no
5682         coding system is assigned.
5683
5684    o coding-category-binary
5685
5686         The category for a coding system not categorized in any of the
5687         above.  Assigned the coding-system (Lisp symbol)
5688         `no-conversion' by default.
5689
5690    Each of them is a Lisp symbol and the value is an actual
5691    `coding-system's (this is also a Lisp symbol) assigned by a user.
5692    What Emacs does actually is to detect a category of coding system.
5693    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5694    decide only one possible category, it selects a category of the
5695    highest priority.  Priorities of categories are also specified by a
5696    user in a Lisp variable `coding-category-list'.
5697
5698 */
5699
5700 #define EOL_SEEN_NONE   0
5701 #define EOL_SEEN_LF     1
5702 #define EOL_SEEN_CR     2
5703 #define EOL_SEEN_CRLF   4
5704
5705 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5706    SOURCE is encoded.  If CATEGORY is one of
5707    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5708    two-byte, else they are encoded by one-byte.
5709
5710    Return one of EOL_SEEN_XXX.  */
5711
5712 #define MAX_EOL_CHECK_COUNT 3
5713
5714 static int
5715 detect_eol (source, src_bytes, category)
5716      const unsigned char *source;
5717      EMACS_INT src_bytes;
5718      enum coding_category category;
5719 {
5720   const unsigned char *src = source, *src_end = src + src_bytes;
5721   unsigned char c;
5722   int total  = 0;
5723   int eol_seen = EOL_SEEN_NONE;
5724
5725   if ((1 << category) & CATEGORY_MASK_UTF_16)
5726     {
5727       int msb, lsb;
5728
5729       msb = category == (coding_category_utf_16_le
5730                          | coding_category_utf_16_le_nosig);
5731       lsb = 1 - msb;
5732
5733       while (src + 1 < src_end)
5734         {
5735           c = src[lsb];
5736           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5737             {
5738               int this_eol;
5739
5740               if (c == '\n')
5741                 this_eol = EOL_SEEN_LF;
5742               else if (src + 3 >= src_end
5743                        || src[msb + 2] != 0
5744                        || src[lsb + 2] != '\n')
5745                 this_eol = EOL_SEEN_CR;
5746               else
5747                 this_eol = EOL_SEEN_CRLF;
5748
5749               if (eol_seen == EOL_SEEN_NONE)
5750                 /* This is the first end-of-line.  */
5751                 eol_seen = this_eol;
5752               else if (eol_seen != this_eol)
5753                 {
5754                   /* The found type is different from what found before.  */
5755                   eol_seen = EOL_SEEN_LF;
5756                   break;
5757                 }
5758               if (++total == MAX_EOL_CHECK_COUNT)
5759                 break;
5760             }
5761           src += 2;
5762         }
5763     }
5764   else
5765     {
5766       while (src < src_end)
5767         {
5768           c = *src++;
5769           if (c == '\n' || c == '\r')
5770             {
5771               int this_eol;
5772
5773               if (c == '\n')
5774                 this_eol = EOL_SEEN_LF;
5775               else if (src >= src_end || *src != '\n')
5776                 this_eol = EOL_SEEN_CR;
5777               else
5778                 this_eol = EOL_SEEN_CRLF, src++;
5779
5780               if (eol_seen == EOL_SEEN_NONE)
5781                 /* This is the first end-of-line.  */
5782                 eol_seen = this_eol;
5783               else if (eol_seen != this_eol)
5784                 {
5785                   /* The found type is different from what found before.  */
5786                   eol_seen = EOL_SEEN_LF;
5787                   break;
5788                 }
5789               if (++total == MAX_EOL_CHECK_COUNT)
5790                 break;
5791             }
5792         }
5793     }
5794   return eol_seen;
5795 }
5796
5797
5798 static Lisp_Object
5799 adjust_coding_eol_type (coding, eol_seen)
5800      struct coding_system *coding;
5801      int eol_seen;
5802 {
5803   Lisp_Object eol_type;
5804
5805   eol_type = CODING_ID_EOL_TYPE (coding->id);
5806   if (eol_seen & EOL_SEEN_LF)
5807     {
5808       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5809       eol_type = Qunix;
5810     }
5811   else if (eol_seen & EOL_SEEN_CRLF)
5812     {
5813       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5814       eol_type = Qdos;
5815     }
5816   else if (eol_seen & EOL_SEEN_CR)
5817     {
5818       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5819       eol_type = Qmac;
5820     }
5821   return eol_type;
5822 }
5823
5824 /* Detect how a text specified in CODING is encoded.  If a coding
5825    system is detected, update fields of CODING by the detected coding
5826    system.  */
5827
5828 void
5829 detect_coding (coding)
5830      struct coding_system *coding;
5831 {
5832   const unsigned char *src, *src_end;
5833
5834   coding->consumed = coding->consumed_char = 0;
5835   coding->produced = coding->produced_char = 0;
5836   coding_set_source (coding);
5837
5838   src_end = coding->source + coding->src_bytes;
5839   coding->head_ascii = 0;
5840
5841   /* If we have not yet decided the text encoding type, detect it
5842      now.  */
5843   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5844     {
5845       int c, i;
5846       struct coding_detection_info detect_info;
5847       int null_byte_found = 0, eight_bit_found = 0;
5848
5849       detect_info.checked = detect_info.found = detect_info.rejected = 0;
5850       for (src = coding->source; src < src_end; src++)
5851         {
5852           c = *src;
5853           if (c & 0x80)
5854             {
5855               eight_bit_found = 1;
5856               if (null_byte_found)
5857                 break;
5858             }
5859           else if (c < 0x20)
5860             {
5861               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5862                   && ! inhibit_iso_escape_detection
5863                   && ! detect_info.checked)
5864                 {
5865                   if (detect_coding_iso_2022 (coding, &detect_info))
5866                     {
5867                       /* We have scanned the whole data.  */
5868                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5869                         {
5870                           /* We didn't find an 8-bit code.  We may
5871                              have found a null-byte, but it's very
5872                              rare that a binary file confirm to
5873                              ISO-2022.  */
5874                           src = src_end;
5875                           coding->head_ascii = src - coding->source;
5876                         }
5877                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
5878                       break;
5879                     }
5880                 }
5881               else if (! c)
5882                 {
5883                   null_byte_found = 1;
5884                   if (eight_bit_found)
5885                     break;
5886                 }
5887               if (! eight_bit_found)
5888                 coding->head_ascii++;
5889             }
5890           else if (! eight_bit_found)
5891             coding->head_ascii++;
5892         }
5893
5894       if (null_byte_found || eight_bit_found
5895           || coding->head_ascii < coding->src_bytes
5896           || detect_info.found)
5897         {
5898           enum coding_category category;
5899           struct coding_system *this;
5900
5901           if (coding->head_ascii == coding->src_bytes)
5902             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
5903             for (i = 0; i < coding_category_raw_text; i++)
5904               {
5905                 category = coding_priorities[i];
5906                 this = coding_categories + category;
5907                 if (detect_info.found & (1 << category))
5908                   break;
5909               }
5910           else
5911             {
5912               if (null_byte_found)
5913                 {
5914                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
5915                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
5916                 }
5917               for (i = 0; i < coding_category_raw_text; i++)
5918                 {
5919                   category = coding_priorities[i];
5920                   this = coding_categories + category;
5921                   if (this->id < 0)
5922                     {
5923                       /* No coding system of this category is defined.  */
5924                       detect_info.rejected |= (1 << category);
5925                     }
5926                   else if (category >= coding_category_raw_text)
5927                     continue;
5928                   else if (detect_info.checked & (1 << category))
5929                     {
5930                       if (detect_info.found & (1 << category))
5931                         break;
5932                     }
5933                   else if ((*(this->detector)) (coding, &detect_info)
5934                            && detect_info.found & (1 << category))
5935                     {
5936                       if (category == coding_category_utf_16_auto)
5937                         {
5938                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5939                             category = coding_category_utf_16_le;
5940                           else
5941                             category = coding_category_utf_16_be;
5942                         }
5943                       break;
5944                     }
5945                 }
5946             }
5947
5948           if (i < coding_category_raw_text)
5949             setup_coding_system (CODING_ID_NAME (this->id), coding);
5950           else if (null_byte_found)
5951             setup_coding_system (Qno_conversion, coding);
5952           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
5953                    == CATEGORY_MASK_ANY)
5954             setup_coding_system (Qraw_text, coding);
5955           else if (detect_info.rejected)
5956             for (i = 0; i < coding_category_raw_text; i++)
5957               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5958                 {
5959                   this = coding_categories + coding_priorities[i];
5960                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5961                   break;
5962                 }
5963         }
5964     }
5965   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5966            == coding_category_utf_8_auto)
5967     {
5968       Lisp_Object coding_systems;
5969       struct coding_detection_info detect_info;
5970
5971       coding_systems
5972         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
5973       detect_info.found = detect_info.rejected = 0;
5974       coding->head_ascii = 0;
5975       if (CONSP (coding_systems)
5976           && detect_coding_utf_8 (coding, &detect_info))
5977         {
5978           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
5979             setup_coding_system (XCAR (coding_systems), coding);
5980           else
5981             setup_coding_system (XCDR (coding_systems), coding);
5982         }
5983     }
5984   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5985            == coding_category_utf_16_auto)
5986     {
5987       Lisp_Object coding_systems;
5988       struct coding_detection_info detect_info;
5989
5990       coding_systems
5991         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
5992       detect_info.found = detect_info.rejected = 0;
5993       coding->head_ascii = 0;
5994       if (CONSP (coding_systems)
5995           && detect_coding_utf_16 (coding, &detect_info))
5996         {
5997           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5998             setup_coding_system (XCAR (coding_systems), coding);
5999           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6000             setup_coding_system (XCDR (coding_systems), coding);
6001         }
6002     }
6003 }
6004
6005
6006 static void
6007 decode_eol (coding)
6008      struct coding_system *coding;
6009 {
6010   Lisp_Object eol_type;
6011   unsigned char *p, *pbeg, *pend;
6012
6013   eol_type = CODING_ID_EOL_TYPE (coding->id);
6014   if (EQ (eol_type, Qunix))
6015     return;
6016
6017   if (NILP (coding->dst_object))
6018     pbeg = coding->destination;
6019   else
6020     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6021   pend = pbeg + coding->produced;
6022
6023   if (VECTORP (eol_type))
6024     {
6025       int eol_seen = EOL_SEEN_NONE;
6026
6027       for (p = pbeg; p < pend; p++)
6028         {
6029           if (*p == '\n')
6030             eol_seen |= EOL_SEEN_LF;
6031           else if (*p == '\r')
6032             {
6033               if (p + 1 < pend && *(p + 1) == '\n')
6034                 {
6035                   eol_seen |= EOL_SEEN_CRLF;
6036                   p++;
6037                 }
6038               else
6039                 eol_seen |= EOL_SEEN_CR;
6040             }
6041         }
6042       if (eol_seen != EOL_SEEN_NONE
6043           && eol_seen != EOL_SEEN_LF
6044           && eol_seen != EOL_SEEN_CRLF
6045           && eol_seen != EOL_SEEN_CR)
6046         eol_seen = EOL_SEEN_LF;
6047       if (eol_seen != EOL_SEEN_NONE)
6048         eol_type = adjust_coding_eol_type (coding, eol_seen);
6049     }
6050
6051   if (EQ (eol_type, Qmac))
6052     {
6053       for (p = pbeg; p < pend; p++)
6054         if (*p == '\r')
6055           *p = '\n';
6056     }
6057   else if (EQ (eol_type, Qdos))
6058     {
6059       int n = 0;
6060
6061       if (NILP (coding->dst_object))
6062         {
6063           /* Start deleting '\r' from the tail to minimize the memory
6064              movement.  */
6065           for (p = pend - 2; p >= pbeg; p--)
6066             if (*p == '\r')
6067               {
6068                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6069                 n++;
6070               }
6071         }
6072       else
6073         {
6074           int pos_byte = coding->dst_pos_byte;
6075           int pos = coding->dst_pos;
6076           int pos_end = pos + coding->produced_char - 1;
6077
6078           while (pos < pos_end)
6079             {
6080               p = BYTE_POS_ADDR (pos_byte);
6081               if (*p == '\r' && p[1] == '\n')
6082                 {
6083                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6084                   n++;
6085                   pos_end--;
6086                 }
6087               pos++;
6088               if (coding->dst_multibyte)
6089                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6090               else
6091                 pos_byte++;
6092             }
6093         }
6094       coding->produced -= n;
6095       coding->produced_char -= n;
6096     }
6097 }
6098
6099
6100 /* Return a translation table (or list of them) from coding system
6101    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6102    decoding (ENCODEP is zero). */
6103
6104 static Lisp_Object
6105 get_translation_table (attrs, encodep, max_lookup)
6106      Lisp_Object attrs;
6107      int encodep, *max_lookup;
6108 {
6109   Lisp_Object standard, translation_table;
6110   Lisp_Object val;
6111
6112   if (encodep)
6113     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6114       standard = Vstandard_translation_table_for_encode;
6115   else
6116     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6117       standard = Vstandard_translation_table_for_decode;
6118   if (NILP (translation_table))
6119     translation_table = standard;
6120   else
6121     {
6122       if (SYMBOLP (translation_table))
6123         translation_table = Fget (translation_table, Qtranslation_table);
6124       else if (CONSP (translation_table))
6125         {
6126           translation_table = Fcopy_sequence (translation_table);
6127           for (val = translation_table; CONSP (val); val = XCDR (val))
6128             if (SYMBOLP (XCAR (val)))
6129               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6130         }
6131       if (CHAR_TABLE_P (standard))
6132         {
6133           if (CONSP (translation_table))
6134             translation_table = nconc2 (translation_table,
6135                                         Fcons (standard, Qnil));
6136           else
6137             translation_table = Fcons (translation_table,
6138                                        Fcons (standard, Qnil));
6139         }
6140     }
6141
6142   if (max_lookup)
6143     {
6144       *max_lookup = 1;
6145       if (CHAR_TABLE_P (translation_table)
6146           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6147         {
6148           val = XCHAR_TABLE (translation_table)->extras[1];
6149           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6150             *max_lookup = XFASTINT (val);
6151         }
6152       else if (CONSP (translation_table))
6153         {
6154           Lisp_Object tail, val;
6155
6156           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6157             if (CHAR_TABLE_P (XCAR (tail))
6158                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6159               {
6160                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6161                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6162                   *max_lookup = XFASTINT (val);
6163               }
6164         }
6165     }
6166   return translation_table;
6167 }
6168
6169 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6170   do {                                                          \
6171     trans = Qnil;                                               \
6172     if (CHAR_TABLE_P (table))                                   \
6173       {                                                         \
6174         trans = CHAR_TABLE_REF (table, c);                      \
6175         if (CHARACTERP (trans))                                 \
6176           c = XFASTINT (trans), trans = Qnil;                   \
6177       }                                                         \
6178     else if (CONSP (table))                                     \
6179       {                                                         \
6180         Lisp_Object tail;                                       \
6181                                                                 \
6182         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6183           if (CHAR_TABLE_P (XCAR (tail)))                       \
6184             {                                                   \
6185               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6186               if (CHARACTERP (trans))                           \
6187                 c = XFASTINT (trans), trans = Qnil;             \
6188               else if (! NILP (trans))                          \
6189                 break;                                          \
6190             }                                                   \
6191       }                                                         \
6192   } while (0)
6193
6194
6195 static Lisp_Object
6196 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
6197      Lisp_Object val;
6198      int *buf, *buf_end;
6199      int last_block;
6200      int *from_nchars, *to_nchars;
6201 {
6202   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
6203      [TO-CHAR ...].  */
6204   if (CONSP (val))
6205     {
6206       Lisp_Object from, tail;
6207       int i, len;
6208
6209       for (tail = val; CONSP (tail); tail = XCDR (tail))
6210         {
6211           val = XCAR (tail);
6212           from = XCAR (val);
6213           len = ASIZE (from);
6214           for (i = 0; i < len; i++)
6215             {
6216               if (buf + i == buf_end)
6217                 {
6218                   if (! last_block)
6219                     return Qt;
6220                   break;
6221                 }
6222               if (XINT (AREF (from, i)) != buf[i])
6223                 break;
6224             }
6225           if (i == len)
6226             {
6227               val = XCDR (val);
6228               *from_nchars = len;
6229               break;
6230             }
6231         }
6232       if (! CONSP (tail))
6233         return Qnil;
6234     }
6235   if (VECTORP (val))
6236     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6237   else
6238     *buf = XINT (val);
6239   return val;
6240 }
6241
6242
6243 static int
6244 produce_chars (coding, translation_table, last_block)
6245      struct coding_system *coding;
6246      Lisp_Object translation_table;
6247      int last_block;
6248 {
6249   unsigned char *dst = coding->destination + coding->produced;
6250   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6251   EMACS_INT produced;
6252   EMACS_INT produced_chars = 0;
6253   int carryover = 0;
6254
6255   if (! coding->chars_at_source)
6256     {
6257       /* Source characters are in coding->charbuf.  */
6258       int *buf = coding->charbuf;
6259       int *buf_end = buf + coding->charbuf_used;
6260
6261       if (EQ (coding->src_object, coding->dst_object))
6262         {
6263           coding_set_source (coding);
6264           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6265         }
6266
6267       while (buf < buf_end)
6268         {
6269           int c = *buf, i;
6270
6271           if (c >= 0)
6272             {
6273               int from_nchars = 1, to_nchars = 1;
6274               Lisp_Object trans = Qnil;
6275
6276               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6277               if (! NILP (trans))
6278                 {
6279                   trans = get_translation (trans, buf, buf_end, last_block,
6280                                            &from_nchars, &to_nchars);
6281                   if (EQ (trans, Qt))
6282                     break;
6283                   c = *buf;
6284                 }
6285
6286               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6287                 {
6288                   dst = alloc_destination (coding,
6289                                            buf_end - buf
6290                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6291                                            dst);
6292                   if (EQ (coding->src_object, coding->dst_object))
6293                     {
6294                       coding_set_source (coding);
6295                       dst_end = ((unsigned char *) coding->source) + coding->consumed;
6296                     }
6297                   else
6298                     dst_end = coding->destination + coding->dst_bytes;
6299                 }
6300
6301               for (i = 0; i < to_nchars; i++)
6302                 {
6303                   if (i > 0)
6304                     c = XINT (AREF (trans, i));
6305                   if (coding->dst_multibyte
6306                       || ! CHAR_BYTE8_P (c))
6307                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6308                   else
6309                     *dst++ = CHAR_TO_BYTE8 (c);
6310                 }
6311               produced_chars += to_nchars;
6312               *buf++ = to_nchars;
6313               while (--from_nchars > 0)
6314                 *buf++ = 0;
6315             }
6316           else
6317             /* This is an annotation datum.  (-C) is the length.  */
6318             buf += -c;
6319         }
6320       carryover = buf_end - buf;
6321     }
6322   else
6323     {
6324       /* Source characters are at coding->source.  */
6325       const unsigned char *src = coding->source;
6326       const unsigned char *src_end = src + coding->consumed;
6327
6328       if (EQ (coding->dst_object, coding->src_object))
6329         dst_end = (unsigned char *) src;
6330       if (coding->src_multibyte != coding->dst_multibyte)
6331         {
6332           if (coding->src_multibyte)
6333             {
6334               int multibytep = 1;
6335               EMACS_INT consumed_chars = 0;
6336
6337               while (1)
6338                 {
6339                   const unsigned char *src_base = src;
6340                   int c;
6341
6342                   ONE_MORE_BYTE (c);
6343                   if (dst == dst_end)
6344                     {
6345                       if (EQ (coding->src_object, coding->dst_object))
6346                         dst_end = (unsigned char *) src;
6347                       if (dst == dst_end)
6348                         {
6349                           EMACS_INT offset = src - coding->source;
6350
6351                           dst = alloc_destination (coding, src_end - src + 1,
6352                                                    dst);
6353                           dst_end = coding->destination + coding->dst_bytes;
6354                           coding_set_source (coding);
6355                           src = coding->source + offset;
6356                           src_end = coding->source + coding->src_bytes;
6357                           if (EQ (coding->src_object, coding->dst_object))
6358                             dst_end = (unsigned char *) src;
6359                         }
6360                     }
6361                   *dst++ = c;
6362                   produced_chars++;
6363                 }
6364             no_more_source:
6365               ;
6366             }
6367           else
6368             while (src < src_end)
6369               {
6370                 int multibytep = 1;
6371                 int c = *src++;
6372
6373                 if (dst >= dst_end - 1)
6374                   {
6375                     if (EQ (coding->src_object, coding->dst_object))
6376                       dst_end = (unsigned char *) src;
6377                     if (dst >= dst_end - 1)
6378                       {
6379                         EMACS_INT offset = src - coding->source;
6380                         EMACS_INT more_bytes;
6381
6382                         if (EQ (coding->src_object, coding->dst_object))
6383                           more_bytes = ((src_end - src) / 2) + 2;
6384                         else
6385                           more_bytes = src_end - src + 2;
6386                         dst = alloc_destination (coding, more_bytes, dst);
6387                         dst_end = coding->destination + coding->dst_bytes;
6388                         coding_set_source (coding);
6389                         src = coding->source + offset;
6390                         src_end = coding->source + coding->src_bytes;
6391                         if (EQ (coding->src_object, coding->dst_object))
6392                           dst_end = (unsigned char *) src;
6393                       }
6394                   }
6395                 EMIT_ONE_BYTE (c);
6396               }
6397         }
6398       else
6399         {
6400           if (!EQ (coding->src_object, coding->dst_object))
6401             {
6402               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6403
6404               if (require > 0)
6405                 {
6406                   EMACS_INT offset = src - coding->source;
6407
6408                   dst = alloc_destination (coding, require, dst);
6409                   coding_set_source (coding);
6410                   src = coding->source + offset;
6411                   src_end = coding->source + coding->src_bytes;
6412                 }
6413             }
6414           produced_chars = coding->consumed_char;
6415           while (src < src_end)
6416             *dst++ = *src++;
6417         }
6418     }
6419
6420   produced = dst - (coding->destination + coding->produced);
6421   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6422     insert_from_gap (produced_chars, produced);
6423   coding->produced += produced;
6424   coding->produced_char += produced_chars;
6425   return carryover;
6426 }
6427
6428 /* Compose text in CODING->object according to the annotation data at
6429    CHARBUF.  CHARBUF is an array:
6430      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
6431  */
6432
6433 static INLINE void
6434 produce_composition (coding, charbuf, pos)
6435      struct coding_system *coding;
6436      int *charbuf;
6437      EMACS_INT pos;
6438 {
6439   int len;
6440   EMACS_INT to;
6441   enum composition_method method;
6442   Lisp_Object components;
6443
6444   len = -charbuf[0];
6445   to = pos + charbuf[2];
6446   if (to <= pos)
6447     return;
6448   method = (enum composition_method) (charbuf[3]);
6449
6450   if (method == COMPOSITION_RELATIVE)
6451     components = Qnil;
6452   else if (method >= COMPOSITION_WITH_RULE
6453            && method <= COMPOSITION_WITH_RULE_ALTCHARS)
6454     {
6455       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6456       int i;
6457
6458       len -= 4;
6459       charbuf += 4;
6460       for (i = 0; i < len; i++)
6461         {
6462           args[i] = make_number (charbuf[i]);
6463           if (charbuf[i] < 0)
6464             return;
6465         }
6466       components = (method == COMPOSITION_WITH_ALTCHARS
6467                     ? Fstring (len, args) : Fvector (len, args));
6468     }
6469   else
6470     return;
6471   compose_text (pos, to, components, Qnil, coding->dst_object);
6472 }
6473
6474
6475 /* Put `charset' property on text in CODING->object according to
6476    the annotation data at CHARBUF.  CHARBUF is an array:
6477      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6478  */
6479
6480 static INLINE void
6481 produce_charset (coding, charbuf, pos)
6482      struct coding_system *coding;
6483      int *charbuf;
6484      EMACS_INT pos;
6485 {
6486   EMACS_INT from = pos - charbuf[2];
6487   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6488
6489   Fput_text_property (make_number (from), make_number (pos),
6490                       Qcharset, CHARSET_NAME (charset),
6491                       coding->dst_object);
6492 }
6493
6494
6495 #define CHARBUF_SIZE 0x4000
6496
6497 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6498   do {                                                                  \
6499     int size = CHARBUF_SIZE;;                                           \
6500                                                                         \
6501     coding->charbuf = NULL;                                             \
6502     while (size > 1024)                                                 \
6503       {                                                                 \
6504         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6505         if (coding->charbuf)                                            \
6506           break;                                                        \
6507         size >>= 1;                                                     \
6508       }                                                                 \
6509     if (! coding->charbuf)                                              \
6510       {                                                                 \
6511         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6512         return coding->result;                                          \
6513       }                                                                 \
6514     coding->charbuf_size = size;                                        \
6515   } while (0)
6516
6517
6518 static void
6519 produce_annotation (coding, pos)
6520      struct coding_system *coding;
6521      EMACS_INT pos;
6522 {
6523   int *charbuf = coding->charbuf;
6524   int *charbuf_end = charbuf + coding->charbuf_used;
6525
6526   if (NILP (coding->dst_object))
6527     return;
6528
6529   while (charbuf < charbuf_end)
6530     {
6531       if (*charbuf >= 0)
6532         pos += *charbuf++;
6533       else
6534         {
6535           int len = -*charbuf;
6536           switch (charbuf[1])
6537             {
6538             case CODING_ANNOTATE_COMPOSITION_MASK:
6539               produce_composition (coding, charbuf, pos);
6540               break;
6541             case CODING_ANNOTATE_CHARSET_MASK:
6542               produce_charset (coding, charbuf, pos);
6543               break;
6544             default:
6545               abort ();
6546             }
6547           charbuf += len;
6548         }
6549     }
6550 }
6551
6552 /* Decode the data at CODING->src_object into CODING->dst_object.
6553    CODING->src_object is a buffer, a string, or nil.
6554    CODING->dst_object is a buffer.
6555
6556    If CODING->src_object is a buffer, it must be the current buffer.
6557    In this case, if CODING->src_pos is positive, it is a position of
6558    the source text in the buffer, otherwise, the source text is in the
6559    gap area of the buffer, and CODING->src_pos specifies the offset of
6560    the text from GPT (which must be the same as PT).  If this is the
6561    same buffer as CODING->dst_object, CODING->src_pos must be
6562    negative.
6563
6564    If CODING->src_object is a string, CODING->src_pos is an index to
6565    that string.
6566
6567    If CODING->src_object is nil, CODING->source must already point to
6568    the non-relocatable memory area.  In this case, CODING->src_pos is
6569    an offset from CODING->source.
6570
6571    The decoded data is inserted at the current point of the buffer
6572    CODING->dst_object.
6573 */
6574
6575 static int
6576 decode_coding (coding)
6577      struct coding_system *coding;
6578 {
6579   Lisp_Object attrs;
6580   Lisp_Object undo_list;
6581   Lisp_Object translation_table;
6582   int carryover;
6583   int i;
6584
6585   if (BUFFERP (coding->src_object)
6586       && coding->src_pos > 0
6587       && coding->src_pos < GPT
6588       && coding->src_pos + coding->src_chars > GPT)
6589     move_gap_both (coding->src_pos, coding->src_pos_byte);
6590
6591   undo_list = Qt;
6592   if (BUFFERP (coding->dst_object))
6593     {
6594       if (current_buffer != XBUFFER (coding->dst_object))
6595         set_buffer_internal (XBUFFER (coding->dst_object));
6596       if (GPT != PT)
6597         move_gap_both (PT, PT_BYTE);
6598       undo_list = current_buffer->undo_list;
6599       current_buffer->undo_list = Qt;
6600     }
6601
6602   coding->consumed = coding->consumed_char = 0;
6603   coding->produced = coding->produced_char = 0;
6604   coding->chars_at_source = 0;
6605   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6606   coding->errors = 0;
6607
6608   ALLOC_CONVERSION_WORK_AREA (coding);
6609
6610   attrs = CODING_ID_ATTRS (coding->id);
6611   translation_table = get_translation_table (attrs, 0, NULL);
6612
6613   carryover = 0;
6614   do
6615     {
6616       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6617
6618       coding_set_source (coding);
6619       coding->annotated = 0;
6620       coding->charbuf_used = carryover;
6621       (*(coding->decoder)) (coding);
6622       coding_set_destination (coding);
6623       carryover = produce_chars (coding, translation_table, 0);
6624       if (coding->annotated)
6625         produce_annotation (coding, pos);
6626       for (i = 0; i < carryover; i++)
6627         coding->charbuf[i]
6628           = coding->charbuf[coding->charbuf_used - carryover + i];
6629     }
6630   while (coding->consumed < coding->src_bytes
6631          && (coding->result == CODING_RESULT_SUCCESS
6632              || coding->result == CODING_RESULT_INVALID_SRC));
6633
6634   if (carryover > 0)
6635     {
6636       coding_set_destination (coding);
6637       coding->charbuf_used = carryover;
6638       produce_chars (coding, translation_table, 1);
6639     }
6640
6641   coding->carryover_bytes = 0;
6642   if (coding->consumed < coding->src_bytes)
6643     {
6644       int nbytes = coding->src_bytes - coding->consumed;
6645       const unsigned char *src;
6646
6647       coding_set_source (coding);
6648       coding_set_destination (coding);
6649       src = coding->source + coding->consumed;
6650
6651       if (coding->mode & CODING_MODE_LAST_BLOCK)
6652         {
6653           /* Flush out unprocessed data as binary chars.  We are sure
6654              that the number of data is less than the size of
6655              coding->charbuf.  */
6656           coding->charbuf_used = 0;
6657           while (nbytes-- > 0)
6658             {
6659               int c = *src++;
6660
6661               if (c & 0x80)
6662                 c = BYTE8_TO_CHAR (c);
6663               coding->charbuf[coding->charbuf_used++] = c;
6664             }
6665           produce_chars (coding, Qnil, 1);
6666         }
6667       else
6668         {
6669           /* Record unprocessed bytes in coding->carryover.  We are
6670              sure that the number of data is less than the size of
6671              coding->carryover.  */
6672           unsigned char *p = coding->carryover;
6673
6674           coding->carryover_bytes = nbytes;
6675           while (nbytes-- > 0)
6676             *p++ = *src++;
6677         }
6678       coding->consumed = coding->src_bytes;
6679     }
6680
6681   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6682     decode_eol (coding);
6683   if (BUFFERP (coding->dst_object))
6684     {
6685       current_buffer->undo_list = undo_list;
6686       record_insert (coding->dst_pos, coding->produced_char);
6687     }
6688   return coding->result;
6689 }
6690
6691
6692 /* Extract an annotation datum from a composition starting at POS and
6693    ending before LIMIT of CODING->src_object (buffer or string), store
6694    the data in BUF, set *STOP to a starting position of the next
6695    composition (if any) or to LIMIT, and return the address of the
6696    next element of BUF.
6697
6698    If such an annotation is not found, set *STOP to a starting
6699    position of a composition after POS (if any) or to LIMIT, and
6700    return BUF.  */
6701
6702 static INLINE int *
6703 handle_composition_annotation (pos, limit, coding, buf, stop)
6704      EMACS_INT pos, limit;
6705      struct coding_system *coding;
6706      int *buf;
6707      EMACS_INT *stop;
6708 {
6709   EMACS_INT start, end;
6710   Lisp_Object prop;
6711
6712   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6713       || end > limit)
6714     *stop = limit;
6715   else if (start > pos)
6716     *stop = start;
6717   else
6718     {
6719       if (start == pos)
6720         {
6721           /* We found a composition.  Store the corresponding
6722              annotation data in BUF.  */
6723           int *head = buf;
6724           enum composition_method method = COMPOSITION_METHOD (prop);
6725           int nchars = COMPOSITION_LENGTH (prop);
6726
6727           ADD_COMPOSITION_DATA (buf, nchars, method);
6728           if (method != COMPOSITION_RELATIVE)
6729             {
6730               Lisp_Object components;
6731               int len, i, i_byte;
6732
6733               components = COMPOSITION_COMPONENTS (prop);
6734               if (VECTORP (components))
6735                 {
6736                   len = XVECTOR (components)->size;
6737                   for (i = 0; i < len; i++)
6738                     *buf++ = XINT (AREF (components, i));
6739                 }
6740               else if (STRINGP (components))
6741                 {
6742                   len = SCHARS (components);
6743                   i = i_byte = 0;
6744                   while (i < len)
6745                     {
6746                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6747                       buf++;
6748                     }
6749                 }
6750               else if (INTEGERP (components))
6751                 {
6752                   len = 1;
6753                   *buf++ = XINT (components);
6754                 }
6755               else if (CONSP (components))
6756                 {
6757                   for (len = 0; CONSP (components);
6758                        len++, components = XCDR (components))
6759                     *buf++ = XINT (XCAR (components));
6760                 }
6761               else
6762                 abort ();
6763               *head -= len;
6764             }
6765         }
6766
6767       if (find_composition (end, limit, &start, &end, &prop,
6768                             coding->src_object)
6769           && end <= limit)
6770         *stop = start;
6771       else
6772         *stop = limit;
6773     }
6774   return buf;
6775 }
6776
6777
6778 /* Extract an annotation datum from a text property `charset' at POS of
6779    CODING->src_object (buffer of string), store the data in BUF, set
6780    *STOP to the position where the value of `charset' property changes
6781    (limiting by LIMIT), and return the address of the next element of
6782    BUF.
6783
6784    If the property value is nil, set *STOP to the position where the
6785    property value is non-nil (limiting by LIMIT), and return BUF.  */
6786
6787 static INLINE int *
6788 handle_charset_annotation (pos, limit, coding, buf, stop)
6789      EMACS_INT pos, limit;
6790      struct coding_system *coding;
6791      int *buf;
6792      EMACS_INT *stop;
6793 {
6794   Lisp_Object val, next;
6795   int id;
6796
6797   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6798   if (! NILP (val) && CHARSETP (val))
6799     id = XINT (CHARSET_SYMBOL_ID (val));
6800   else
6801     id = -1;
6802   ADD_CHARSET_DATA (buf, 0, id);
6803   next = Fnext_single_property_change (make_number (pos), Qcharset,
6804                                        coding->src_object,
6805                                        make_number (limit));
6806   *stop = XINT (next);
6807   return buf;
6808 }
6809
6810
6811 static void
6812 consume_chars (coding, translation_table, max_lookup)
6813      struct coding_system *coding;
6814      Lisp_Object translation_table;
6815      int max_lookup;
6816 {
6817   int *buf = coding->charbuf;
6818   int *buf_end = coding->charbuf + coding->charbuf_size;
6819   const unsigned char *src = coding->source + coding->consumed;
6820   const unsigned char *src_end = coding->source + coding->src_bytes;
6821   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6822   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6823   int multibytep = coding->src_multibyte;
6824   Lisp_Object eol_type;
6825   int c;
6826   EMACS_INT stop, stop_composition, stop_charset;
6827   int *lookup_buf = NULL;
6828
6829   if (! NILP (translation_table))
6830     lookup_buf = alloca (sizeof (int) * max_lookup);
6831
6832   eol_type = CODING_ID_EOL_TYPE (coding->id);
6833   if (VECTORP (eol_type))
6834     eol_type = Qunix;
6835
6836   /* Note: composition handling is not yet implemented.  */
6837   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6838
6839   if (NILP (coding->src_object))
6840     stop = stop_composition = stop_charset = end_pos;
6841   else
6842     {
6843       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6844         stop = stop_composition = pos;
6845       else
6846         stop = stop_composition = end_pos;
6847       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6848         stop = stop_charset = pos;
6849       else
6850         stop_charset = end_pos;
6851     }
6852
6853   /* Compensate for CRLF and conversion.  */
6854   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6855   while (buf < buf_end)
6856     {
6857       Lisp_Object trans;
6858
6859       if (pos == stop)
6860         {
6861           if (pos == end_pos)
6862             break;
6863           if (pos == stop_composition)
6864             buf = handle_composition_annotation (pos, end_pos, coding,
6865                                                  buf, &stop_composition);
6866           if (pos == stop_charset)
6867             buf = handle_charset_annotation (pos, end_pos, coding,
6868                                              buf, &stop_charset);
6869           stop = (stop_composition < stop_charset
6870                   ? stop_composition : stop_charset);
6871         }
6872
6873       if (! multibytep)
6874         {
6875           EMACS_INT bytes;
6876
6877           if (coding->encoder == encode_coding_raw_text)
6878             c = *src++, pos++;
6879           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6880             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
6881           else
6882             c = BYTE8_TO_CHAR (*src), src++, pos++;
6883         }
6884       else
6885         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
6886       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6887         c = '\n';
6888       if (! EQ (eol_type, Qunix))
6889         {
6890           if (c == '\n')
6891             {
6892               if (EQ (eol_type, Qdos))
6893                 *buf++ = '\r';
6894               else
6895                 c = '\r';
6896             }
6897         }
6898
6899       trans = Qnil;
6900       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6901       if (NILP (trans))
6902         *buf++ = c;
6903       else
6904         {
6905           int from_nchars = 1, to_nchars = 1;
6906           int *lookup_buf_end;
6907           const unsigned char *p = src;
6908           int i;
6909
6910           lookup_buf[0] = c;
6911           for (i = 1; i < max_lookup && p < src_end; i++)
6912             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6913           lookup_buf_end = lookup_buf + i;
6914           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6915                                    &from_nchars, &to_nchars);
6916           if (EQ (trans, Qt)
6917               || buf + to_nchars > buf_end)
6918             break;
6919           *buf++ = *lookup_buf;
6920           for (i = 1; i < to_nchars; i++)
6921             *buf++ = XINT (AREF (trans, i));
6922           for (i = 1; i < from_nchars; i++, pos++)
6923             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6924         }
6925     }
6926
6927   coding->consumed = src - coding->source;
6928   coding->consumed_char = pos - coding->src_pos;
6929   coding->charbuf_used = buf - coding->charbuf;
6930   coding->chars_at_source = 0;
6931 }
6932
6933
6934 /* Encode the text at CODING->src_object into CODING->dst_object.
6935    CODING->src_object is a buffer or a string.
6936    CODING->dst_object is a buffer or nil.
6937
6938    If CODING->src_object is a buffer, it must be the current buffer.
6939    In this case, if CODING->src_pos is positive, it is a position of
6940    the source text in the buffer, otherwise. the source text is in the
6941    gap area of the buffer, and coding->src_pos specifies the offset of
6942    the text from GPT (which must be the same as PT).  If this is the
6943    same buffer as CODING->dst_object, CODING->src_pos must be
6944    negative and CODING should not have `pre-write-conversion'.
6945
6946    If CODING->src_object is a string, CODING should not have
6947    `pre-write-conversion'.
6948
6949    If CODING->dst_object is a buffer, the encoded data is inserted at
6950    the current point of that buffer.
6951
6952    If CODING->dst_object is nil, the encoded data is placed at the
6953    memory area specified by CODING->destination.  */
6954
6955 static int
6956 encode_coding (coding)
6957      struct coding_system *coding;
6958 {
6959   Lisp_Object attrs;
6960   Lisp_Object translation_table;
6961   int max_lookup;
6962
6963   attrs = CODING_ID_ATTRS (coding->id);
6964   if (coding->encoder == encode_coding_raw_text)
6965     translation_table = Qnil, max_lookup = 0;
6966   else
6967     translation_table = get_translation_table (attrs, 1, &max_lookup);
6968
6969   if (BUFFERP (coding->dst_object))
6970     {
6971       set_buffer_internal (XBUFFER (coding->dst_object));
6972       coding->dst_multibyte
6973         = ! NILP (current_buffer->enable_multibyte_characters);
6974     }
6975
6976   coding->consumed = coding->consumed_char = 0;
6977   coding->produced = coding->produced_char = 0;
6978   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6979   coding->errors = 0;
6980
6981   ALLOC_CONVERSION_WORK_AREA (coding);
6982
6983   do {
6984     coding_set_source (coding);
6985     consume_chars (coding, translation_table, max_lookup);
6986     coding_set_destination (coding);
6987     (*(coding->encoder)) (coding);
6988   } while (coding->consumed_char < coding->src_chars);
6989
6990   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
6991     insert_from_gap (coding->produced_char, coding->produced);
6992
6993   return (coding->result);
6994 }
6995
6996
6997 /* Name (or base name) of work buffer for code conversion.  */
6998 static Lisp_Object Vcode_conversion_workbuf_name;
6999
7000 /* A working buffer used by the top level conversion.  Once it is
7001    created, it is never destroyed.  It has the name
7002    Vcode_conversion_workbuf_name.  The other working buffers are
7003    destroyed after the use is finished, and their names are modified
7004    versions of Vcode_conversion_workbuf_name.  */
7005 static Lisp_Object Vcode_conversion_reused_workbuf;
7006
7007 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7008 static int reused_workbuf_in_use;
7009
7010
7011 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7012    multibyteness of returning buffer.  */
7013
7014 static Lisp_Object
7015 make_conversion_work_buffer (multibyte)
7016      int multibyte;
7017 {
7018   Lisp_Object name, workbuf;
7019   struct buffer *current;
7020
7021   if (reused_workbuf_in_use++)
7022     {
7023       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7024       workbuf = Fget_buffer_create (name);
7025     }
7026   else
7027     {
7028       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7029         Vcode_conversion_reused_workbuf
7030           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7031       workbuf = Vcode_conversion_reused_workbuf;
7032     }
7033   current = current_buffer;
7034   set_buffer_internal (XBUFFER (workbuf));
7035   /* We can't allow modification hooks to run in the work buffer.  For
7036      instance, directory_files_internal assumes that file decoding
7037      doesn't compile new regexps.  */
7038   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7039   Ferase_buffer ();
7040   current_buffer->undo_list = Qt;
7041   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7042   set_buffer_internal (current);
7043   return workbuf;
7044 }
7045
7046
7047 static Lisp_Object
7048 code_conversion_restore (arg)
7049      Lisp_Object arg;
7050 {
7051   Lisp_Object current, workbuf;
7052   struct gcpro gcpro1;
7053
7054   GCPRO1 (arg);
7055   current = XCAR (arg);
7056   workbuf = XCDR (arg);
7057   if (! NILP (workbuf))
7058     {
7059       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7060         reused_workbuf_in_use = 0;
7061       else if (! NILP (Fbuffer_live_p (workbuf)))
7062         Fkill_buffer (workbuf);
7063     }
7064   set_buffer_internal (XBUFFER (current));
7065   UNGCPRO;
7066   return Qnil;
7067 }
7068
7069 Lisp_Object
7070 code_conversion_save (with_work_buf, multibyte)
7071      int with_work_buf, multibyte;
7072 {
7073   Lisp_Object workbuf = Qnil;
7074
7075   if (with_work_buf)
7076     workbuf = make_conversion_work_buffer (multibyte);
7077   record_unwind_protect (code_conversion_restore,
7078                          Fcons (Fcurrent_buffer (), workbuf));
7079   return workbuf;
7080 }
7081
7082 int
7083 decode_coding_gap (coding, chars, bytes)
7084      struct coding_system *coding;
7085      EMACS_INT chars, bytes;
7086 {
7087   int count = specpdl_ptr - specpdl;
7088   Lisp_Object attrs;
7089
7090   code_conversion_save (0, 0);
7091
7092   coding->src_object = Fcurrent_buffer ();
7093   coding->src_chars = chars;
7094   coding->src_bytes = bytes;
7095   coding->src_pos = -chars;
7096   coding->src_pos_byte = -bytes;
7097   coding->src_multibyte = chars < bytes;
7098   coding->dst_object = coding->src_object;
7099   coding->dst_pos = PT;
7100   coding->dst_pos_byte = PT_BYTE;
7101   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7102
7103   if (CODING_REQUIRE_DETECTION (coding))
7104     detect_coding (coding);
7105
7106   coding->mode |= CODING_MODE_LAST_BLOCK;
7107   current_buffer->text->inhibit_shrinking = 1;
7108   decode_coding (coding);
7109   current_buffer->text->inhibit_shrinking = 0;
7110
7111   attrs = CODING_ID_ATTRS (coding->id);
7112   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7113     {
7114       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7115       Lisp_Object val;
7116
7117       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7118       val = call1 (CODING_ATTR_POST_READ (attrs),
7119                    make_number (coding->produced_char));
7120       CHECK_NATNUM (val);
7121       coding->produced_char += Z - prev_Z;
7122       coding->produced += Z_BYTE - prev_Z_BYTE;
7123     }
7124
7125   unbind_to (count, Qnil);
7126   return coding->result;
7127 }
7128
7129 int
7130 encode_coding_gap (coding, chars, bytes)
7131      struct coding_system *coding;
7132      EMACS_INT chars, bytes;
7133 {
7134   int count = specpdl_ptr - specpdl;
7135
7136   code_conversion_save (0, 0);
7137
7138   coding->src_object = Fcurrent_buffer ();
7139   coding->src_chars = chars;
7140   coding->src_bytes = bytes;
7141   coding->src_pos = -chars;
7142   coding->src_pos_byte = -bytes;
7143   coding->src_multibyte = chars < bytes;
7144   coding->dst_object = coding->src_object;
7145   coding->dst_pos = PT;
7146   coding->dst_pos_byte = PT_BYTE;
7147
7148   encode_coding (coding);
7149
7150   unbind_to (count, Qnil);
7151   return coding->result;
7152 }
7153
7154
7155 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7156    SRC_OBJECT into DST_OBJECT by coding context CODING.
7157
7158    SRC_OBJECT is a buffer, a string, or Qnil.
7159
7160    If it is a buffer, the text is at point of the buffer.  FROM and TO
7161    are positions in the buffer.
7162
7163    If it is a string, the text is at the beginning of the string.
7164    FROM and TO are indices to the string.
7165
7166    If it is nil, the text is at coding->source.  FROM and TO are
7167    indices to coding->source.
7168
7169    DST_OBJECT is a buffer, Qt, or Qnil.
7170
7171    If it is a buffer, the decoded text is inserted at point of the
7172    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7173    is deleted.
7174
7175    If it is Qt, a string is made from the decoded text, and
7176    set in CODING->dst_object.
7177
7178    If it is Qnil, the decoded text is stored at CODING->destination.
7179    The caller must allocate CODING->dst_bytes bytes at
7180    CODING->destination by xmalloc.  If the decoded text is longer than
7181    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7182  */
7183
7184 void
7185 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7186                       dst_object)
7187      struct coding_system *coding;
7188      Lisp_Object src_object;
7189      EMACS_INT from, from_byte, to, to_byte;
7190      Lisp_Object dst_object;
7191 {
7192   int count = specpdl_ptr - specpdl;
7193   unsigned char *destination;
7194   EMACS_INT dst_bytes;
7195   EMACS_INT chars = to - from;
7196   EMACS_INT bytes = to_byte - from_byte;
7197   Lisp_Object attrs;
7198   int saved_pt = -1, saved_pt_byte;
7199   int need_marker_adjustment = 0;
7200   Lisp_Object old_deactivate_mark;
7201
7202   old_deactivate_mark = Vdeactivate_mark;
7203
7204   if (NILP (dst_object))
7205     {
7206       destination = coding->destination;
7207       dst_bytes = coding->dst_bytes;
7208     }
7209
7210   coding->src_object = src_object;
7211   coding->src_chars = chars;
7212   coding->src_bytes = bytes;
7213   coding->src_multibyte = chars < bytes;
7214
7215   if (STRINGP (src_object))
7216     {
7217       coding->src_pos = from;
7218       coding->src_pos_byte = from_byte;
7219     }
7220   else if (BUFFERP (src_object))
7221     {
7222       set_buffer_internal (XBUFFER (src_object));
7223       if (from != GPT)
7224         move_gap_both (from, from_byte);
7225       if (EQ (src_object, dst_object))
7226         {
7227           struct Lisp_Marker *tail;
7228
7229           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7230             {
7231               tail->need_adjustment
7232                 = tail->charpos == (tail->insertion_type ? from : to);
7233               need_marker_adjustment |= tail->need_adjustment;
7234             }
7235           saved_pt = PT, saved_pt_byte = PT_BYTE;
7236           TEMP_SET_PT_BOTH (from, from_byte);
7237           current_buffer->text->inhibit_shrinking = 1;
7238           del_range_both (from, from_byte, to, to_byte, 1);
7239           coding->src_pos = -chars;
7240           coding->src_pos_byte = -bytes;
7241         }
7242       else
7243         {
7244           coding->src_pos = from;
7245           coding->src_pos_byte = from_byte;
7246         }
7247     }
7248
7249   if (CODING_REQUIRE_DETECTION (coding))
7250     detect_coding (coding);
7251   attrs = CODING_ID_ATTRS (coding->id);
7252
7253   if (EQ (dst_object, Qt)
7254       || (! NILP (CODING_ATTR_POST_READ (attrs))
7255           && NILP (dst_object)))
7256     {
7257       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7258       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7259       coding->dst_pos = BEG;
7260       coding->dst_pos_byte = BEG_BYTE;
7261     }
7262   else if (BUFFERP (dst_object))
7263     {
7264       code_conversion_save (0, 0);
7265       coding->dst_object = dst_object;
7266       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7267       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7268       coding->dst_multibyte
7269         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7270     }
7271   else
7272     {
7273       code_conversion_save (0, 0);
7274       coding->dst_object = Qnil;
7275       /* Most callers presume this will return a multibyte result, and they
7276          won't use `binary' or `raw-text' anyway, so let's not worry about
7277          CODING_FOR_UNIBYTE.  */
7278       coding->dst_multibyte = 1;
7279     }
7280
7281   decode_coding (coding);
7282
7283   if (BUFFERP (coding->dst_object))
7284     set_buffer_internal (XBUFFER (coding->dst_object));
7285
7286   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7287     {
7288       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7289       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7290       Lisp_Object val;
7291
7292       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7293       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7294               old_deactivate_mark);
7295       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7296                         make_number (coding->produced_char));
7297       UNGCPRO;
7298       CHECK_NATNUM (val);
7299       coding->produced_char += Z - prev_Z;
7300       coding->produced += Z_BYTE - prev_Z_BYTE;
7301     }
7302
7303   if (EQ (dst_object, Qt))
7304     {
7305       coding->dst_object = Fbuffer_string ();
7306     }
7307   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7308     {
7309       set_buffer_internal (XBUFFER (coding->dst_object));
7310       if (dst_bytes < coding->produced)
7311         {
7312           destination = xrealloc (destination, coding->produced);
7313           if (! destination)
7314             {
7315               record_conversion_result (coding,
7316                                         CODING_RESULT_INSUFFICIENT_DST);
7317               unbind_to (count, Qnil);
7318               return;
7319             }
7320           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7321             move_gap_both (BEGV, BEGV_BYTE);
7322           bcopy (BEGV_ADDR, destination, coding->produced);
7323           coding->destination = destination;
7324         }
7325     }
7326
7327   if (saved_pt >= 0)
7328     {
7329       /* This is the case of:
7330          (BUFFERP (src_object) && EQ (src_object, dst_object))
7331          As we have moved PT while replacing the original buffer
7332          contents, we must recover it now.  */
7333       set_buffer_internal (XBUFFER (src_object));
7334       current_buffer->text->inhibit_shrinking = 0;
7335       if (saved_pt < from)
7336         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7337       else if (saved_pt < from + chars)
7338         TEMP_SET_PT_BOTH (from, from_byte);
7339       else if (! NILP (current_buffer->enable_multibyte_characters))
7340         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7341                           saved_pt_byte + (coding->produced - bytes));
7342       else
7343         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7344                           saved_pt_byte + (coding->produced - bytes));
7345
7346       if (need_marker_adjustment)
7347         {
7348           struct Lisp_Marker *tail;
7349
7350           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7351             if (tail->need_adjustment)
7352               {
7353                 tail->need_adjustment = 0;
7354                 if (tail->insertion_type)
7355                   {
7356                     tail->bytepos = from_byte;
7357                     tail->charpos = from;
7358                   }
7359                 else
7360                   {
7361                     tail->bytepos = from_byte + coding->produced;
7362                     tail->charpos
7363                       = (NILP (current_buffer->enable_multibyte_characters)
7364                          ? tail->bytepos : from + coding->produced_char);
7365                   }
7366               }
7367         }
7368     }
7369
7370   Vdeactivate_mark = old_deactivate_mark;
7371   unbind_to (count, coding->dst_object);
7372 }
7373
7374
7375 void
7376 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7377                       dst_object)
7378      struct coding_system *coding;
7379      Lisp_Object src_object;
7380      EMACS_INT from, from_byte, to, to_byte;
7381      Lisp_Object dst_object;
7382 {
7383   int count = specpdl_ptr - specpdl;
7384   EMACS_INT chars = to - from;
7385   EMACS_INT bytes = to_byte - from_byte;
7386   Lisp_Object attrs;
7387   int saved_pt = -1, saved_pt_byte;
7388   int need_marker_adjustment = 0;
7389   int kill_src_buffer = 0;
7390   Lisp_Object old_deactivate_mark;
7391
7392   old_deactivate_mark = Vdeactivate_mark;
7393
7394   coding->src_object = src_object;
7395   coding->src_chars = chars;
7396   coding->src_bytes = bytes;
7397   coding->src_multibyte = chars < bytes;
7398
7399   attrs = CODING_ID_ATTRS (coding->id);
7400
7401   if (EQ (src_object, dst_object))
7402     {
7403       struct Lisp_Marker *tail;
7404
7405       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7406         {
7407           tail->need_adjustment
7408             = tail->charpos == (tail->insertion_type ? from : to);
7409           need_marker_adjustment |= tail->need_adjustment;
7410         }
7411     }
7412
7413   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7414     {
7415       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7416       set_buffer_internal (XBUFFER (coding->src_object));
7417       if (STRINGP (src_object))
7418         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7419       else if (BUFFERP (src_object))
7420         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7421       else
7422         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7423
7424       if (EQ (src_object, dst_object))
7425         {
7426           set_buffer_internal (XBUFFER (src_object));
7427           saved_pt = PT, saved_pt_byte = PT_BYTE;
7428           del_range_both (from, from_byte, to, to_byte, 1);
7429           set_buffer_internal (XBUFFER (coding->src_object));
7430         }
7431
7432       {
7433         Lisp_Object args[3];
7434         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7435
7436         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7437                 old_deactivate_mark);
7438         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7439         args[1] = make_number (BEG);
7440         args[2] = make_number (Z);
7441         safe_call (3, args);
7442         UNGCPRO;
7443       }
7444       if (XBUFFER (coding->src_object) != current_buffer)
7445         kill_src_buffer = 1;
7446       coding->src_object = Fcurrent_buffer ();
7447       if (BEG != GPT)
7448         move_gap_both (BEG, BEG_BYTE);
7449       coding->src_chars = Z - BEG;
7450       coding->src_bytes = Z_BYTE - BEG_BYTE;
7451       coding->src_pos = BEG;
7452       coding->src_pos_byte = BEG_BYTE;
7453       coding->src_multibyte = Z < Z_BYTE;
7454     }
7455   else if (STRINGP (src_object))
7456     {
7457       code_conversion_save (0, 0);
7458       coding->src_pos = from;
7459       coding->src_pos_byte = from_byte;
7460     }
7461   else if (BUFFERP (src_object))
7462     {
7463       code_conversion_save (0, 0);
7464       set_buffer_internal (XBUFFER (src_object));
7465       if (EQ (src_object, dst_object))
7466         {
7467           saved_pt = PT, saved_pt_byte = PT_BYTE;
7468           coding->src_object = del_range_1 (from, to, 1, 1);
7469           coding->src_pos = 0;
7470           coding->src_pos_byte = 0;
7471         }
7472       else
7473         {
7474           if (from < GPT && to >= GPT)
7475             move_gap_both (from, from_byte);
7476           coding->src_pos = from;
7477           coding->src_pos_byte = from_byte;
7478         }
7479     }
7480   else
7481     code_conversion_save (0, 0);
7482
7483   if (BUFFERP (dst_object))
7484     {
7485       coding->dst_object = dst_object;
7486       if (EQ (src_object, dst_object))
7487         {
7488           coding->dst_pos = from;
7489           coding->dst_pos_byte = from_byte;
7490         }
7491       else
7492         {
7493           struct buffer *current = current_buffer;
7494
7495           set_buffer_temp (XBUFFER (dst_object));
7496           coding->dst_pos = PT;
7497           coding->dst_pos_byte = PT_BYTE;
7498           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7499           set_buffer_temp (current);
7500         }
7501       coding->dst_multibyte
7502         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7503     }
7504   else if (EQ (dst_object, Qt))
7505     {
7506       coding->dst_object = Qnil;
7507       coding->dst_bytes = coding->src_chars;
7508       if (coding->dst_bytes == 0)
7509         coding->dst_bytes = 1;
7510       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
7511       coding->dst_multibyte = 0;
7512     }
7513   else
7514     {
7515       coding->dst_object = Qnil;
7516       coding->dst_multibyte = 0;
7517     }
7518
7519   encode_coding (coding);
7520
7521   if (EQ (dst_object, Qt))
7522     {
7523       if (BUFFERP (coding->dst_object))
7524         coding->dst_object = Fbuffer_string ();
7525       else
7526         {
7527           coding->dst_object
7528             = make_unibyte_string ((char *) coding->destination,
7529                                    coding->produced);
7530           xfree (coding->destination);
7531         }
7532     }
7533
7534   if (saved_pt >= 0)
7535     {
7536       /* This is the case of:
7537          (BUFFERP (src_object) && EQ (src_object, dst_object))
7538          As we have moved PT while replacing the original buffer
7539          contents, we must recover it now.  */
7540       set_buffer_internal (XBUFFER (src_object));
7541       if (saved_pt < from)
7542         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7543       else if (saved_pt < from + chars)
7544         TEMP_SET_PT_BOTH (from, from_byte);
7545       else if (! NILP (current_buffer->enable_multibyte_characters))
7546         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7547                           saved_pt_byte + (coding->produced - bytes));
7548       else
7549         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7550                           saved_pt_byte + (coding->produced - bytes));
7551
7552       if (need_marker_adjustment)
7553         {
7554           struct Lisp_Marker *tail;
7555
7556           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7557             if (tail->need_adjustment)
7558               {
7559                 tail->need_adjustment = 0;
7560                 if (tail->insertion_type)
7561                   {
7562                     tail->bytepos = from_byte;
7563                     tail->charpos = from;
7564                   }
7565                 else
7566                   {
7567                     tail->bytepos = from_byte + coding->produced;
7568                     tail->charpos
7569                       = (NILP (current_buffer->enable_multibyte_characters)
7570                          ? tail->bytepos : from + coding->produced_char);
7571                   }
7572               }
7573         }
7574     }
7575
7576   if (kill_src_buffer)
7577     Fkill_buffer (coding->src_object);
7578
7579   Vdeactivate_mark = old_deactivate_mark;
7580   unbind_to (count, Qnil);
7581 }
7582
7583
7584 Lisp_Object
7585 preferred_coding_system ()
7586 {
7587   int id = coding_categories[coding_priorities[0]].id;
7588
7589   return CODING_ID_NAME (id);
7590 }
7591
7592 \f
7593 #ifdef emacs
7594 /*** 8. Emacs Lisp library functions ***/
7595
7596 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
7597        doc: /* Return t if OBJECT is nil or a coding-system.
7598 See the documentation of `define-coding-system' for information
7599 about coding-system objects.  */)
7600      (object)
7601      Lisp_Object object;
7602 {
7603   if (NILP (object)
7604       || CODING_SYSTEM_ID (object) >= 0)
7605     return Qt;
7606   if (! SYMBOLP (object)
7607       || NILP (Fget (object, Qcoding_system_define_form)))
7608     return Qnil;
7609   return Qt;
7610 }
7611
7612 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7613        Sread_non_nil_coding_system, 1, 1, 0,
7614        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
7615      (prompt)
7616      Lisp_Object prompt;
7617 {
7618   Lisp_Object val;
7619   do
7620     {
7621       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7622                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
7623     }
7624   while (SCHARS (val) == 0);
7625   return (Fintern (val, Qnil));
7626 }
7627
7628 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
7629        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7630 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7631 Ignores case when completing coding systems (all Emacs coding systems
7632 are lower-case).  */)
7633      (prompt, default_coding_system)
7634      Lisp_Object prompt, default_coding_system;
7635 {
7636   Lisp_Object val;
7637   int count = SPECPDL_INDEX ();
7638
7639   if (SYMBOLP (default_coding_system))
7640     default_coding_system = SYMBOL_NAME (default_coding_system);
7641   specbind (Qcompletion_ignore_case, Qt);
7642   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7643                           Qt, Qnil, Qcoding_system_history,
7644                           default_coding_system, Qnil);
7645   unbind_to (count, Qnil);
7646   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
7647 }
7648
7649 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7650        1, 1, 0,
7651        doc: /* Check validity of CODING-SYSTEM.
7652 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7653 It is valid if it is nil or a symbol defined as a coding system by the
7654 function `define-coding-system'.  */)
7655   (coding_system)
7656      Lisp_Object coding_system;
7657 {
7658   Lisp_Object define_form;
7659
7660   define_form = Fget (coding_system, Qcoding_system_define_form);
7661   if (! NILP (define_form))
7662     {
7663       Fput (coding_system, Qcoding_system_define_form, Qnil);
7664       safe_eval (define_form);
7665     }
7666   if (!NILP (Fcoding_system_p (coding_system)))
7667     return coding_system;
7668   xsignal1 (Qcoding_system_error, coding_system);
7669 }
7670
7671 \f
7672 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
7673    HIGHEST is nonzero, return the coding system of the highest
7674    priority among the detected coding systems.  Otherwize return a
7675    list of detected coding systems sorted by their priorities.  If
7676    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7677    multibyte form but contains only ASCII and eight-bit chars.
7678    Otherwise, the bytes are raw bytes.
7679
7680    CODING-SYSTEM controls the detection as below:
7681
7682    If it is nil, detect both text-format and eol-format.  If the
7683    text-format part of CODING-SYSTEM is already specified
7684    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
7685    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7686    detect only text-format.  */
7687
7688 Lisp_Object
7689 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7690                       coding_system)
7691      const unsigned char *src;
7692      EMACS_INT src_chars, src_bytes;
7693      int highest;
7694      int multibytep;
7695      Lisp_Object coding_system;
7696 {
7697   const unsigned char *src_end = src + src_bytes;
7698   Lisp_Object attrs, eol_type;
7699   Lisp_Object val = Qnil;
7700   struct coding_system coding;
7701   int id;
7702   struct coding_detection_info detect_info;
7703   enum coding_category base_category;
7704   int null_byte_found = 0, eight_bit_found = 0;
7705
7706   if (NILP (coding_system))
7707     coding_system = Qundecided;
7708   setup_coding_system (coding_system, &coding);
7709   attrs = CODING_ID_ATTRS (coding.id);
7710   eol_type = CODING_ID_EOL_TYPE (coding.id);
7711   coding_system = CODING_ATTR_BASE_NAME (attrs);
7712
7713   coding.source = src;
7714   coding.src_chars = src_chars;
7715   coding.src_bytes = src_bytes;
7716   coding.src_multibyte = multibytep;
7717   coding.consumed = 0;
7718   coding.mode |= CODING_MODE_LAST_BLOCK;
7719   coding.head_ascii = 0;
7720
7721   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7722
7723   /* At first, detect text-format if necessary.  */
7724   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7725   if (base_category == coding_category_undecided)
7726     {
7727       enum coding_category category;
7728       struct coding_system *this;
7729       int c, i;
7730
7731       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7732       for (; src < src_end; src++)
7733         {
7734           c = *src;
7735           if (c & 0x80)
7736             {
7737               eight_bit_found = 1;
7738               if (null_byte_found)
7739                 break;
7740             }
7741           else if (c < 0x20)
7742             {
7743               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7744                   && ! inhibit_iso_escape_detection
7745                   && ! detect_info.checked)
7746                 {
7747                   if (detect_coding_iso_2022 (&coding, &detect_info))
7748                     {
7749                       /* We have scanned the whole data.  */
7750                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7751                         {
7752                           /* We didn't find an 8-bit code.  We may
7753                              have found a null-byte, but it's very
7754                              rare that a binary file confirm to
7755                              ISO-2022.  */
7756                           src = src_end;
7757                           coding.head_ascii = src - coding.source;
7758                         }
7759                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
7760                       break;
7761                     }
7762                 }
7763               else if (! c)
7764                 {
7765                   null_byte_found = 1;
7766                   if (eight_bit_found)
7767                     break;
7768                 }
7769               if (! eight_bit_found)
7770                 coding.head_ascii++;
7771             }
7772           else if (! eight_bit_found)
7773             coding.head_ascii++;
7774         }
7775
7776       if (null_byte_found || eight_bit_found
7777           || coding.head_ascii < coding.src_bytes
7778           || detect_info.found)
7779         {
7780           if (coding.head_ascii == coding.src_bytes)
7781             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
7782             for (i = 0; i < coding_category_raw_text; i++)
7783               {
7784                 category = coding_priorities[i];
7785                 this = coding_categories + category;
7786                 if (detect_info.found & (1 << category))
7787                   break;
7788               }
7789           else
7790             {
7791               if (null_byte_found)
7792                 {
7793                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
7794                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
7795                 }
7796               for (i = 0; i < coding_category_raw_text; i++)
7797                 {
7798                   category = coding_priorities[i];
7799                   this = coding_categories + category;
7800
7801                   if (this->id < 0)
7802                     {
7803                       /* No coding system of this category is defined.  */
7804                       detect_info.rejected |= (1 << category);
7805                     }
7806                   else if (category >= coding_category_raw_text)
7807                     continue;
7808                   else if (detect_info.checked & (1 << category))
7809                     {
7810                       if (highest
7811                           && (detect_info.found & (1 << category)))
7812                         break;
7813                     }
7814                   else if ((*(this->detector)) (&coding, &detect_info)
7815                            && highest
7816                            && (detect_info.found & (1 << category)))
7817                     {
7818                       if (category == coding_category_utf_16_auto)
7819                         {
7820                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7821                             category = coding_category_utf_16_le;
7822                           else
7823                             category = coding_category_utf_16_be;
7824                         }
7825                       break;
7826                     }
7827                 }
7828             }
7829         }
7830
7831       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY)
7832         {
7833           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7834           id = coding_categories[coding_category_raw_text].id;
7835           val = Fcons (make_number (id), Qnil);
7836         }
7837       else if (! detect_info.rejected && ! detect_info.found)
7838         {
7839           detect_info.found = CATEGORY_MASK_ANY;
7840           id = coding_categories[coding_category_undecided].id;
7841           val = Fcons (make_number (id), Qnil);
7842         }
7843       else if (highest)
7844         {
7845           if (detect_info.found)
7846             {
7847               detect_info.found = 1 << category;
7848               val = Fcons (make_number (this->id), Qnil);
7849             }
7850           else
7851             for (i = 0; i < coding_category_raw_text; i++)
7852               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7853                 {
7854                   detect_info.found = 1 << coding_priorities[i];
7855                   id = coding_categories[coding_priorities[i]].id;
7856                   val = Fcons (make_number (id), Qnil);
7857                   break;
7858                 }
7859         }
7860       else
7861         {
7862           int mask = detect_info.rejected | detect_info.found;
7863           int found = 0;
7864
7865           for (i = coding_category_raw_text - 1; i >= 0; i--)
7866             {
7867               category = coding_priorities[i];
7868               if (! (mask & (1 << category)))
7869                 {
7870                   found |= 1 << category;
7871                   id = coding_categories[category].id;
7872                   if (id >= 0)
7873                     val = Fcons (make_number (id), val);
7874                 }
7875             }
7876           for (i = coding_category_raw_text - 1; i >= 0; i--)
7877             {
7878               category = coding_priorities[i];
7879               if (detect_info.found & (1 << category))
7880                 {
7881                   id = coding_categories[category].id;
7882                   val = Fcons (make_number (id), val);
7883                 }
7884             }
7885           detect_info.found |= found;
7886         }
7887     }
7888   else if (base_category == coding_category_utf_8_auto)
7889     {
7890       if (detect_coding_utf_8 (&coding, &detect_info))
7891         {
7892           struct coding_system *this;
7893
7894           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
7895             this = coding_categories + coding_category_utf_8_sig;
7896           else
7897             this = coding_categories + coding_category_utf_8_nosig;
7898           val = Fcons (make_number (this->id), Qnil);
7899         }
7900     }
7901   else if (base_category == coding_category_utf_16_auto)
7902     {
7903       if (detect_coding_utf_16 (&coding, &detect_info))
7904         {
7905           struct coding_system *this;
7906
7907           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7908             this = coding_categories + coding_category_utf_16_le;
7909           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7910             this = coding_categories + coding_category_utf_16_be;
7911           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7912             this = coding_categories + coding_category_utf_16_be_nosig;
7913           else
7914             this = coding_categories + coding_category_utf_16_le_nosig;
7915           val = Fcons (make_number (this->id), Qnil);
7916         }
7917     }
7918   else
7919     {
7920       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7921       val = Fcons (make_number (coding.id), Qnil);
7922     }
7923
7924   /* Then, detect eol-format if necessary.  */
7925   {
7926     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
7927     Lisp_Object tail;
7928
7929     if (VECTORP (eol_type))
7930       {
7931         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7932           {
7933             if (null_byte_found)
7934               normal_eol = EOL_SEEN_LF;
7935             else
7936               normal_eol = detect_eol (coding.source, src_bytes,
7937                                        coding_category_raw_text);
7938           }
7939         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7940                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7941           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7942                                       coding_category_utf_16_be);
7943         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7944                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7945           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7946                                       coding_category_utf_16_le);
7947       }
7948     else
7949       {
7950         if (EQ (eol_type, Qunix))
7951           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7952         else if (EQ (eol_type, Qdos))
7953           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7954         else
7955           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7956       }
7957
7958     for (tail = val; CONSP (tail); tail = XCDR (tail))
7959       {
7960         enum coding_category category;
7961         int this_eol;
7962
7963         id = XINT (XCAR (tail));
7964         attrs = CODING_ID_ATTRS (id);
7965         category = XINT (CODING_ATTR_CATEGORY (attrs));
7966         eol_type = CODING_ID_EOL_TYPE (id);
7967         if (VECTORP (eol_type))
7968           {
7969             if (category == coding_category_utf_16_be
7970                 || category == coding_category_utf_16_be_nosig)
7971               this_eol = utf_16_be_eol;
7972             else if (category == coding_category_utf_16_le
7973                      || category == coding_category_utf_16_le_nosig)
7974               this_eol = utf_16_le_eol;
7975             else
7976               this_eol = normal_eol;
7977
7978             if (this_eol == EOL_SEEN_LF)
7979               XSETCAR (tail, AREF (eol_type, 0));
7980             else if (this_eol == EOL_SEEN_CRLF)
7981               XSETCAR (tail, AREF (eol_type, 1));
7982             else if (this_eol == EOL_SEEN_CR)
7983               XSETCAR (tail, AREF (eol_type, 2));
7984             else
7985               XSETCAR (tail, CODING_ID_NAME (id));
7986           }
7987         else
7988           XSETCAR (tail, CODING_ID_NAME (id));
7989       }
7990   }
7991
7992   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
7993 }
7994
7995
7996 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7997        2, 3, 0,
7998        doc: /* Detect coding system of the text in the region between START and END.
7999 Return a list of possible coding systems ordered by priority.
8000
8001 If only ASCII characters are found (except for such ISO-2022 control
8002 characters as ESC), it returns a list of single element `undecided'
8003 or its subsidiary coding system according to a detected end-of-line
8004 format.
8005
8006 If optional argument HIGHEST is non-nil, return the coding system of
8007 highest priority.  */)
8008      (start, end, highest)
8009      Lisp_Object start, end, highest;
8010 {
8011   int from, to;
8012   int from_byte, to_byte;
8013
8014   CHECK_NUMBER_COERCE_MARKER (start);
8015   CHECK_NUMBER_COERCE_MARKER (end);
8016
8017   validate_region (&start, &end);
8018   from = XINT (start), to = XINT (end);
8019   from_byte = CHAR_TO_BYTE (from);
8020   to_byte = CHAR_TO_BYTE (to);
8021
8022   if (from < GPT && to >= GPT)
8023     move_gap_both (to, to_byte);
8024
8025   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8026                                to - from, to_byte - from_byte,
8027                                !NILP (highest),
8028                                !NILP (current_buffer
8029                                       ->enable_multibyte_characters),
8030                                Qnil);
8031 }
8032
8033 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8034        1, 2, 0,
8035        doc: /* Detect coding system of the text in STRING.
8036 Return a list of possible coding systems ordered by priority.
8037
8038 If only ASCII characters are found (except for such ISO-2022 control
8039 characters as ESC), it returns a list of single element `undecided'
8040 or its subsidiary coding system according to a detected end-of-line
8041 format.
8042
8043 If optional argument HIGHEST is non-nil, return the coding system of
8044 highest priority.  */)
8045      (string, highest)
8046      Lisp_Object string, highest;
8047 {
8048   CHECK_STRING (string);
8049
8050   return detect_coding_system (SDATA (string),
8051                                SCHARS (string), SBYTES (string),
8052                                !NILP (highest), STRING_MULTIBYTE (string),
8053                                Qnil);
8054 }
8055
8056
8057 static INLINE int
8058 char_encodable_p (c, attrs)
8059      int c;
8060      Lisp_Object attrs;
8061 {
8062   Lisp_Object tail;
8063   struct charset *charset;
8064   Lisp_Object translation_table;
8065
8066   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8067   if (! NILP (translation_table))
8068     c = translate_char (translation_table, c);
8069   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8070        CONSP (tail); tail = XCDR (tail))
8071     {
8072       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8073       if (CHAR_CHARSET_P (c, charset))
8074         break;
8075     }
8076   return (! NILP (tail));
8077 }
8078
8079
8080 /* Return a list of coding systems that safely encode the text between
8081    START and END.  If EXCLUDE is non-nil, it is a list of coding
8082    systems not to check.  The returned list doesn't contain any such
8083    coding systems.  In any case, if the text contains only ASCII or is
8084    unibyte, return t.  */
8085
8086 DEFUN ("find-coding-systems-region-internal",
8087        Ffind_coding_systems_region_internal,
8088        Sfind_coding_systems_region_internal, 2, 3, 0,
8089        doc: /* Internal use only.  */)
8090      (start, end, exclude)
8091      Lisp_Object start, end, exclude;
8092 {
8093   Lisp_Object coding_attrs_list, safe_codings;
8094   EMACS_INT start_byte, end_byte;
8095   const unsigned char *p, *pbeg, *pend;
8096   int c;
8097   Lisp_Object tail, elt;
8098
8099   if (STRINGP (start))
8100     {
8101       if (!STRING_MULTIBYTE (start)
8102           || SCHARS (start) == SBYTES (start))
8103         return Qt;
8104       start_byte = 0;
8105       end_byte = SBYTES (start);
8106     }
8107   else
8108     {
8109       CHECK_NUMBER_COERCE_MARKER (start);
8110       CHECK_NUMBER_COERCE_MARKER (end);
8111       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8112         args_out_of_range (start, end);
8113       if (NILP (current_buffer->enable_multibyte_characters))
8114         return Qt;
8115       start_byte = CHAR_TO_BYTE (XINT (start));
8116       end_byte = CHAR_TO_BYTE (XINT (end));
8117       if (XINT (end) - XINT (start) == end_byte - start_byte)
8118         return Qt;
8119
8120       if (XINT (start) < GPT && XINT (end) > GPT)
8121         {
8122           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8123             move_gap_both (XINT (start), start_byte);
8124           else
8125             move_gap_both (XINT (end), end_byte);
8126         }
8127     }
8128
8129   coding_attrs_list = Qnil;
8130   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8131     if (NILP (exclude)
8132         || NILP (Fmemq (XCAR (tail), exclude)))
8133       {
8134         Lisp_Object attrs;
8135
8136         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8137         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8138             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8139           {
8140             ASET (attrs, coding_attr_trans_tbl,
8141                   get_translation_table (attrs, 1, NULL));
8142             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8143           }
8144       }
8145
8146   if (STRINGP (start))
8147     p = pbeg = SDATA (start);
8148   else
8149     p = pbeg = BYTE_POS_ADDR (start_byte);
8150   pend = p + (end_byte - start_byte);
8151
8152   while (p < pend && ASCII_BYTE_P (*p)) p++;
8153   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8154
8155   while (p < pend)
8156     {
8157       if (ASCII_BYTE_P (*p))
8158         p++;
8159       else
8160         {
8161           c = STRING_CHAR_ADVANCE (p);
8162
8163           charset_map_loaded = 0;
8164           for (tail = coding_attrs_list; CONSP (tail);)
8165             {
8166               elt = XCAR (tail);
8167               if (NILP (elt))
8168                 tail = XCDR (tail);
8169               else if (char_encodable_p (c, elt))
8170                 tail = XCDR (tail);
8171               else if (CONSP (XCDR (tail)))
8172                 {
8173                   XSETCAR (tail, XCAR (XCDR (tail)));
8174                   XSETCDR (tail, XCDR (XCDR (tail)));
8175                 }
8176               else
8177                 {
8178                   XSETCAR (tail, Qnil);
8179                   tail = XCDR (tail);
8180                 }
8181             }
8182           if (charset_map_loaded)
8183             {
8184               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8185
8186               if (STRINGP (start))
8187                 pbeg = SDATA (start);
8188               else
8189                 pbeg = BYTE_POS_ADDR (start_byte);
8190               p = pbeg + p_offset;
8191               pend = pbeg + pend_offset;
8192             }
8193         }
8194     }
8195
8196   safe_codings = list2 (Qraw_text, Qno_conversion);
8197   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8198     if (! NILP (XCAR (tail)))
8199       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8200
8201   return safe_codings;
8202 }
8203
8204
8205 DEFUN ("unencodable-char-position", Funencodable_char_position,
8206        Sunencodable_char_position, 3, 5, 0,
8207        doc: /*
8208 Return position of first un-encodable character in a region.
8209 START and END specify the region and CODING-SYSTEM specifies the
8210 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8211
8212 If optional 4th argument COUNT is non-nil, it specifies at most how
8213 many un-encodable characters to search.  In this case, the value is a
8214 list of positions.
8215
8216 If optional 5th argument STRING is non-nil, it is a string to search
8217 for un-encodable characters.  In that case, START and END are indexes
8218 to the string.  */)
8219      (start, end, coding_system, count, string)
8220      Lisp_Object start, end, coding_system, count, string;
8221 {
8222   int n;
8223   struct coding_system coding;
8224   Lisp_Object attrs, charset_list, translation_table;
8225   Lisp_Object positions;
8226   int from, to;
8227   const unsigned char *p, *stop, *pend;
8228   int ascii_compatible;
8229
8230   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8231   attrs = CODING_ID_ATTRS (coding.id);
8232   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8233     return Qnil;
8234   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8235   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8236   translation_table = get_translation_table (attrs, 1, NULL);
8237
8238   if (NILP (string))
8239     {
8240       validate_region (&start, &end);
8241       from = XINT (start);
8242       to = XINT (end);
8243       if (NILP (current_buffer->enable_multibyte_characters)
8244           || (ascii_compatible
8245               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8246         return Qnil;
8247       p = CHAR_POS_ADDR (from);
8248       pend = CHAR_POS_ADDR (to);
8249       if (from < GPT && to >= GPT)
8250         stop = GPT_ADDR;
8251       else
8252         stop = pend;
8253     }
8254   else
8255     {
8256       CHECK_STRING (string);
8257       CHECK_NATNUM (start);
8258       CHECK_NATNUM (end);
8259       from = XINT (start);
8260       to = XINT (end);
8261       if (from > to
8262           || to > SCHARS (string))
8263         args_out_of_range_3 (string, start, end);
8264       if (! STRING_MULTIBYTE (string))
8265         return Qnil;
8266       p = SDATA (string) + string_char_to_byte (string, from);
8267       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8268       if (ascii_compatible && (to - from) == (pend - p))
8269         return Qnil;
8270     }
8271
8272   if (NILP (count))
8273     n = 1;
8274   else
8275     {
8276       CHECK_NATNUM (count);
8277       n = XINT (count);
8278     }
8279
8280   positions = Qnil;
8281   while (1)
8282     {
8283       int c;
8284
8285       if (ascii_compatible)
8286         while (p < stop && ASCII_BYTE_P (*p))
8287           p++, from++;
8288       if (p >= stop)
8289         {
8290           if (p >= pend)
8291             break;
8292           stop = pend;
8293           p = GAP_END_ADDR;
8294         }
8295
8296       c = STRING_CHAR_ADVANCE (p);
8297       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8298           && ! char_charset (translate_char (translation_table, c),
8299                              charset_list, NULL))
8300         {
8301           positions = Fcons (make_number (from), positions);
8302           n--;
8303           if (n == 0)
8304             break;
8305         }
8306
8307       from++;
8308     }
8309
8310   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8311 }
8312
8313
8314 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8315        Scheck_coding_systems_region, 3, 3, 0,
8316        doc: /* Check if the region is encodable by coding systems.
8317
8318 START and END are buffer positions specifying the region.
8319 CODING-SYSTEM-LIST is a list of coding systems to check.
8320
8321 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8322 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8323 whole region, POS0, POS1, ... are buffer positions where non-encodable
8324 characters are found.
8325
8326 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8327 value is nil.
8328
8329 START may be a string.  In that case, check if the string is
8330 encodable, and the value contains indices to the string instead of
8331 buffer positions.  END is ignored.  */)
8332      (start, end, coding_system_list)
8333      Lisp_Object start, end, coding_system_list;
8334 {
8335   Lisp_Object list;
8336   EMACS_INT start_byte, end_byte;
8337   int pos;
8338   const unsigned char *p, *pbeg, *pend;
8339   int c;
8340   Lisp_Object tail, elt, attrs;
8341
8342   if (STRINGP (start))
8343     {
8344       if (!STRING_MULTIBYTE (start)
8345           && SCHARS (start) != SBYTES (start))
8346         return Qnil;
8347       start_byte = 0;
8348       end_byte = SBYTES (start);
8349       pos = 0;
8350     }
8351   else
8352     {
8353       CHECK_NUMBER_COERCE_MARKER (start);
8354       CHECK_NUMBER_COERCE_MARKER (end);
8355       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8356         args_out_of_range (start, end);
8357       if (NILP (current_buffer->enable_multibyte_characters))
8358         return Qnil;
8359       start_byte = CHAR_TO_BYTE (XINT (start));
8360       end_byte = CHAR_TO_BYTE (XINT (end));
8361       if (XINT (end) - XINT (start) == end_byte - start_byte)
8362         return Qt;
8363
8364       if (XINT (start) < GPT && XINT (end) > GPT)
8365         {
8366           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8367             move_gap_both (XINT (start), start_byte);
8368           else
8369             move_gap_both (XINT (end), end_byte);
8370         }
8371       pos = XINT (start);
8372     }
8373
8374   list = Qnil;
8375   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8376     {
8377       elt = XCAR (tail);
8378       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8379       ASET (attrs, coding_attr_trans_tbl,
8380             get_translation_table (attrs, 1, NULL));
8381       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8382     }
8383
8384   if (STRINGP (start))
8385     p = pbeg = SDATA (start);
8386   else
8387     p = pbeg = BYTE_POS_ADDR (start_byte);
8388   pend = p + (end_byte - start_byte);
8389
8390   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8391   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8392
8393   while (p < pend)
8394     {
8395       if (ASCII_BYTE_P (*p))
8396         p++;
8397       else
8398         {
8399           c = STRING_CHAR_ADVANCE (p);
8400
8401           charset_map_loaded = 0;
8402           for (tail = list; CONSP (tail); tail = XCDR (tail))
8403             {
8404               elt = XCDR (XCAR (tail));
8405               if (! char_encodable_p (c, XCAR (elt)))
8406                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8407             }
8408           if (charset_map_loaded)
8409             {
8410               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8411
8412               if (STRINGP (start))
8413                 pbeg = SDATA (start);
8414               else
8415                 pbeg = BYTE_POS_ADDR (start_byte);
8416               p = pbeg + p_offset;
8417               pend = pbeg + pend_offset;
8418             }
8419         }
8420       pos++;
8421     }
8422
8423   tail = list;
8424   list = Qnil;
8425   for (; CONSP (tail); tail = XCDR (tail))
8426     {
8427       elt = XCAR (tail);
8428       if (CONSP (XCDR (XCDR (elt))))
8429         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8430                       list);
8431     }
8432
8433   return list;
8434 }
8435
8436
8437 Lisp_Object
8438 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8439      Lisp_Object start, end, coding_system, dst_object;
8440      int encodep, norecord;
8441 {
8442   struct coding_system coding;
8443   EMACS_INT from, from_byte, to, to_byte;
8444   Lisp_Object src_object;
8445
8446   CHECK_NUMBER_COERCE_MARKER (start);
8447   CHECK_NUMBER_COERCE_MARKER (end);
8448   if (NILP (coding_system))
8449     coding_system = Qno_conversion;
8450   else
8451     CHECK_CODING_SYSTEM (coding_system);
8452   src_object = Fcurrent_buffer ();
8453   if (NILP (dst_object))
8454     dst_object = src_object;
8455   else if (! EQ (dst_object, Qt))
8456     CHECK_BUFFER (dst_object);
8457
8458   validate_region (&start, &end);
8459   from = XFASTINT (start);
8460   from_byte = CHAR_TO_BYTE (from);
8461   to = XFASTINT (end);
8462   to_byte = CHAR_TO_BYTE (to);
8463
8464   setup_coding_system (coding_system, &coding);
8465   coding.mode |= CODING_MODE_LAST_BLOCK;
8466
8467   if (encodep)
8468     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8469                           dst_object);
8470   else
8471     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8472                           dst_object);
8473   if (! norecord)
8474     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8475
8476   return (BUFFERP (dst_object)
8477           ? make_number (coding.produced_char)
8478           : coding.dst_object);
8479 }
8480
8481
8482 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8483        3, 4, "r\nzCoding system: ",
8484        doc: /* Decode the current region from the specified coding system.
8485 When called from a program, takes four arguments:
8486         START, END, CODING-SYSTEM, and DESTINATION.
8487 START and END are buffer positions.
8488
8489 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8490 If nil, the region between START and END is replaced by the decoded text.
8491 If buffer, the decoded text is inserted in that buffer after point (point
8492 does not move).
8493 In those cases, the length of the decoded text is returned.
8494 If DESTINATION is t, the decoded text is returned.
8495
8496 This function sets `last-coding-system-used' to the precise coding system
8497 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8498 not fully specified.)  */)
8499      (start, end, coding_system, destination)
8500      Lisp_Object start, end, coding_system, destination;
8501 {
8502   return code_convert_region (start, end, coding_system, destination, 0, 0);
8503 }
8504
8505 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8506        3, 4, "r\nzCoding system: ",
8507        doc: /* Encode the current region by specified coding system.
8508 When called from a program, takes four arguments:
8509         START, END, CODING-SYSTEM and DESTINATION.
8510 START and END are buffer positions.
8511
8512 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8513 If nil, the region between START and END is replace by the encoded text.
8514 If buffer, the encoded text is inserted in that buffer after point (point
8515 does not move).
8516 In those cases, the length of the encoded text is returned.
8517 If DESTINATION is t, the encoded text is returned.
8518
8519 This function sets `last-coding-system-used' to the precise coding system
8520 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8521 not fully specified.)  */)
8522   (start, end, coding_system, destination)
8523      Lisp_Object start, end, coding_system, destination;
8524 {
8525   return code_convert_region (start, end, coding_system, destination, 1, 0);
8526 }
8527
8528 Lisp_Object
8529 code_convert_string (string, coding_system, dst_object,
8530                      encodep, nocopy, norecord)
8531      Lisp_Object string, coding_system, dst_object;
8532      int encodep, nocopy, norecord;
8533 {
8534   struct coding_system coding;
8535   EMACS_INT chars, bytes;
8536
8537   CHECK_STRING (string);
8538   if (NILP (coding_system))
8539     {
8540       if (! norecord)
8541         Vlast_coding_system_used = Qno_conversion;
8542       if (NILP (dst_object))
8543         return (nocopy ? Fcopy_sequence (string) : string);
8544     }
8545
8546   if (NILP (coding_system))
8547     coding_system = Qno_conversion;
8548   else
8549     CHECK_CODING_SYSTEM (coding_system);
8550   if (NILP (dst_object))
8551     dst_object = Qt;
8552   else if (! EQ (dst_object, Qt))
8553     CHECK_BUFFER (dst_object);
8554
8555   setup_coding_system (coding_system, &coding);
8556   coding.mode |= CODING_MODE_LAST_BLOCK;
8557   chars = SCHARS (string);
8558   bytes = SBYTES (string);
8559   if (encodep)
8560     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8561   else
8562     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8563   if (! norecord)
8564     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8565
8566   return (BUFFERP (dst_object)
8567           ? make_number (coding.produced_char)
8568           : coding.dst_object);
8569 }
8570
8571
8572 /* Encode or decode STRING according to CODING_SYSTEM.
8573    Do not set Vlast_coding_system_used.
8574
8575    This function is called only from macros DECODE_FILE and
8576    ENCODE_FILE, thus we ignore character composition.  */
8577
8578 Lisp_Object
8579 code_convert_string_norecord (string, coding_system, encodep)
8580      Lisp_Object string, coding_system;
8581      int encodep;
8582 {
8583   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
8584 }
8585
8586
8587 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8588        2, 4, 0,
8589        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8590
8591 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8592 if the decoding operation is trivial.
8593
8594 Optional fourth arg BUFFER non-nil means that the decoded text is
8595 inserted in that buffer after point (point does not move).  In this
8596 case, the return value is the length of the decoded text.
8597
8598 This function sets `last-coding-system-used' to the precise coding system
8599 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8600 not fully specified.)  */)
8601   (string, coding_system, nocopy, buffer)
8602      Lisp_Object string, coding_system, nocopy, buffer;
8603 {
8604   return code_convert_string (string, coding_system, buffer,
8605                               0, ! NILP (nocopy), 0);
8606 }
8607
8608 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8609        2, 4, 0,
8610        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8611
8612 Optional third arg NOCOPY non-nil means it is OK to return STRING
8613 itself if the encoding operation is trivial.
8614
8615 Optional fourth arg BUFFER non-nil means that the encoded text is
8616 inserted in that buffer after point (point does not move).  In this
8617 case, the return value is the length of the encoded text.
8618
8619 This function sets `last-coding-system-used' to the precise coding system
8620 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8621 not fully specified.)  */)
8622      (string, coding_system, nocopy, buffer)
8623      Lisp_Object string, coding_system, nocopy, buffer;
8624 {
8625   return code_convert_string (string, coding_system, buffer,
8626                               1, ! NILP (nocopy), 1);
8627 }
8628
8629 \f
8630 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
8631        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8632 Return the corresponding character.  */)
8633      (code)
8634      Lisp_Object code;
8635 {
8636   Lisp_Object spec, attrs, val;
8637   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8638   int c;
8639
8640   CHECK_NATNUM (code);
8641   c = XFASTINT (code);
8642   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8643   attrs = AREF (spec, 0);
8644
8645   if (ASCII_BYTE_P (c)
8646       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8647     return code;
8648
8649   val = CODING_ATTR_CHARSET_LIST (attrs);
8650   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8651   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8652   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
8653
8654   if (c <= 0x7F)
8655     charset = charset_roman;
8656   else if (c >= 0xA0 && c < 0xDF)
8657     {
8658       charset = charset_kana;
8659       c -= 0x80;
8660     }
8661   else
8662     {
8663       int s1 = c >> 8, s2 = c & 0xFF;
8664
8665       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8666           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8667         error ("Invalid code: %d", code);
8668       SJIS_TO_JIS (c);
8669       charset = charset_kanji;
8670     }
8671   c = DECODE_CHAR (charset, c);
8672   if (c < 0)
8673     error ("Invalid code: %d", code);
8674   return make_number (c);
8675 }
8676
8677
8678 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8679        doc: /* Encode a Japanese character CH to shift_jis encoding.
8680 Return the corresponding code in SJIS.  */)
8681      (ch)
8682     Lisp_Object ch;
8683 {
8684   Lisp_Object spec, attrs, charset_list;
8685   int c;
8686   struct charset *charset;
8687   unsigned code;
8688
8689   CHECK_CHARACTER (ch);
8690   c = XFASTINT (ch);
8691   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8692   attrs = AREF (spec, 0);
8693
8694   if (ASCII_CHAR_P (c)
8695       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8696     return ch;
8697
8698   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8699   charset = char_charset (c, charset_list, &code);
8700   if (code == CHARSET_INVALID_CODE (charset))
8701     error ("Can't encode by shift_jis encoding: %d", c);
8702   JIS_TO_SJIS (code);
8703
8704   return make_number (code);
8705 }
8706
8707 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
8708        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8709 Return the corresponding character.  */)
8710      (code)
8711      Lisp_Object code;
8712 {
8713   Lisp_Object spec, attrs, val;
8714   struct charset *charset_roman, *charset_big5, *charset;
8715   int c;
8716
8717   CHECK_NATNUM (code);
8718   c = XFASTINT (code);
8719   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8720   attrs = AREF (spec, 0);
8721
8722   if (ASCII_BYTE_P (c)
8723       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8724     return code;
8725
8726   val = CODING_ATTR_CHARSET_LIST (attrs);
8727   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8728   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
8729
8730   if (c <= 0x7F)
8731     charset = charset_roman;
8732   else
8733     {
8734       int b1 = c >> 8, b2 = c & 0x7F;
8735       if (b1 < 0xA1 || b1 > 0xFE
8736           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8737         error ("Invalid code: %d", code);
8738       charset = charset_big5;
8739     }
8740   c = DECODE_CHAR (charset, (unsigned )c);
8741   if (c < 0)
8742     error ("Invalid code: %d", code);
8743   return make_number (c);
8744 }
8745
8746 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8747        doc: /* Encode the Big5 character CH to BIG5 coding system.
8748 Return the corresponding character code in Big5.  */)
8749      (ch)
8750      Lisp_Object ch;
8751 {
8752   Lisp_Object spec, attrs, charset_list;
8753   struct charset *charset;
8754   int c;
8755   unsigned code;
8756
8757   CHECK_CHARACTER (ch);
8758   c = XFASTINT (ch);
8759   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8760   attrs = AREF (spec, 0);
8761   if (ASCII_CHAR_P (c)
8762       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8763     return ch;
8764
8765   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8766   charset = char_charset (c, charset_list, &code);
8767   if (code == CHARSET_INVALID_CODE (charset))
8768     error ("Can't encode by Big5 encoding: %d", c);
8769
8770   return make_number (code);
8771 }
8772
8773 \f
8774 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
8775        Sset_terminal_coding_system_internal, 1, 2, 0,
8776        doc: /* Internal use only.  */)
8777      (coding_system, terminal)
8778      Lisp_Object coding_system;
8779      Lisp_Object terminal;
8780 {
8781   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8782   CHECK_SYMBOL (coding_system);
8783   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
8784   /* We had better not send unsafe characters to terminal.  */
8785   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
8786   /* Characer composition should be disabled.  */
8787   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8788   terminal_coding->src_multibyte = 1;
8789   terminal_coding->dst_multibyte = 0;
8790   return Qnil;
8791 }
8792
8793 DEFUN ("set-safe-terminal-coding-system-internal",
8794        Fset_safe_terminal_coding_system_internal,
8795        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8796        doc: /* Internal use only.  */)
8797      (coding_system)
8798      Lisp_Object coding_system;
8799 {
8800   CHECK_SYMBOL (coding_system);
8801   setup_coding_system (Fcheck_coding_system (coding_system),
8802                        &safe_terminal_coding);
8803   /* Characer composition should be disabled.  */
8804   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8805   safe_terminal_coding.src_multibyte = 1;
8806   safe_terminal_coding.dst_multibyte = 0;
8807   return Qnil;
8808 }
8809
8810 DEFUN ("terminal-coding-system", Fterminal_coding_system,
8811        Sterminal_coding_system, 0, 1, 0,
8812        doc: /* Return coding system specified for terminal output on the given terminal.
8813 TERMINAL may be a terminal id, a frame, or nil for the selected
8814 frame's terminal device.  */)
8815      (terminal)
8816      Lisp_Object terminal;
8817 {
8818   struct coding_system *terminal_coding
8819     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8820   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
8821
8822   /* For backward compatibility, return nil if it is `undecided'. */
8823   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
8824 }
8825
8826 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
8827        Sset_keyboard_coding_system_internal, 1, 2, 0,
8828        doc: /* Internal use only.  */)
8829      (coding_system, terminal)
8830      Lisp_Object coding_system;
8831      Lisp_Object terminal;
8832 {
8833   struct terminal *t = get_terminal (terminal, 1);
8834   CHECK_SYMBOL (coding_system);
8835   setup_coding_system (Fcheck_coding_system (coding_system),
8836                        TERMINAL_KEYBOARD_CODING (t));
8837   /* Characer composition should be disabled.  */
8838   TERMINAL_KEYBOARD_CODING (t)->common_flags
8839     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8840   return Qnil;
8841 }
8842
8843 DEFUN ("keyboard-coding-system",
8844        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
8845        doc: /* Return coding system specified for decoding keyboard input.  */)
8846      (terminal)
8847      Lisp_Object terminal;
8848 {
8849   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8850                          (get_terminal (terminal, 1))->id);
8851 }
8852
8853 \f
8854 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8855        Sfind_operation_coding_system,  1, MANY, 0,
8856        doc: /* Choose a coding system for an operation based on the target name.
8857 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8858 DECODING-SYSTEM is the coding system to use for decoding
8859 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8860 for encoding (in case OPERATION does encoding).
8861
8862 The first argument OPERATION specifies an I/O primitive:
8863   For file I/O, `insert-file-contents' or `write-region'.
8864   For process I/O, `call-process', `call-process-region', or `start-process'.
8865   For network I/O, `open-network-stream'.
8866
8867 The remaining arguments should be the same arguments that were passed
8868 to the primitive.  Depending on which primitive, one of those arguments
8869 is selected as the TARGET.  For example, if OPERATION does file I/O,
8870 whichever argument specifies the file name is TARGET.
8871
8872 TARGET has a meaning which depends on OPERATION:
8873   For file I/O, TARGET is a file name (except for the special case below).
8874   For process I/O, TARGET is a process name.
8875   For network I/O, TARGET is a service name or a port number.
8876
8877 This function looks up what is specified for TARGET in
8878 `file-coding-system-alist', `process-coding-system-alist',
8879 or `network-coding-system-alist' depending on OPERATION.
8880 They may specify a coding system, a cons of coding systems,
8881 or a function symbol to call.
8882 In the last case, we call the function with one argument,
8883 which is a list of all the arguments given to this function.
8884 If the function can't decide a coding system, it can return
8885 `undecided' so that the normal code-detection is performed.
8886
8887 If OPERATION is `insert-file-contents', the argument corresponding to
8888 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
8889 file name to look up, and BUFFER is a buffer that contains the file's
8890 contents (not yet decoded).  If `file-coding-system-alist' specifies a
8891 function to call for FILENAME, that function should examine the
8892 contents of BUFFER instead of reading the file.
8893
8894 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
8895      (nargs, args)
8896      int nargs;
8897      Lisp_Object *args;
8898 {
8899   Lisp_Object operation, target_idx, target, val;
8900   register Lisp_Object chain;
8901
8902   if (nargs < 2)
8903     error ("Too few arguments");
8904   operation = args[0];
8905   if (!SYMBOLP (operation)
8906       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8907     error ("Invalid first argument");
8908   if (nargs < 1 + XINT (target_idx))
8909     error ("Too few arguments for operation: %s",
8910            SDATA (SYMBOL_NAME (operation)));
8911   target = args[XINT (target_idx) + 1];
8912   if (!(STRINGP (target)
8913         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8914             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
8915         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8916     error ("Invalid %dth argument", XINT (target_idx) + 1);
8917   if (CONSP (target))
8918     target = XCAR (target);
8919
8920   chain = ((EQ (operation, Qinsert_file_contents)
8921             || EQ (operation, Qwrite_region))
8922            ? Vfile_coding_system_alist
8923            : (EQ (operation, Qopen_network_stream)
8924               ? Vnetwork_coding_system_alist
8925               : Vprocess_coding_system_alist));
8926   if (NILP (chain))
8927     return Qnil;
8928
8929   for (; CONSP (chain); chain = XCDR (chain))
8930     {
8931       Lisp_Object elt;
8932
8933       elt = XCAR (chain);
8934       if (CONSP (elt)
8935           && ((STRINGP (target)
8936                && STRINGP (XCAR (elt))
8937                && fast_string_match (XCAR (elt), target) >= 0)
8938               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8939         {
8940           val = XCDR (elt);
8941           /* Here, if VAL is both a valid coding system and a valid
8942              function symbol, we return VAL as a coding system.  */
8943           if (CONSP (val))
8944             return val;
8945           if (! SYMBOLP (val))
8946             return Qnil;
8947           if (! NILP (Fcoding_system_p (val)))
8948             return Fcons (val, val);
8949           if (! NILP (Ffboundp (val)))
8950             {
8951               /* We use call1 rather than safe_call1
8952                  so as to get bug reports about functions called here
8953                  which don't handle the current interface.  */
8954               val = call1 (val, Flist (nargs, args));
8955               if (CONSP (val))
8956                 return val;
8957               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8958                 return Fcons (val, val);
8959             }
8960           return Qnil;
8961         }
8962     }
8963   return Qnil;
8964 }
8965
8966 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8967        Sset_coding_system_priority, 0, MANY, 0,
8968        doc: /* Assign higher priority to the coding systems given as arguments.
8969 If multiple coding systems belong to the same category,
8970 all but the first one are ignored.
8971
8972 usage: (set-coding-system-priority &rest coding-systems)  */)
8973      (nargs, args)
8974      int nargs;
8975      Lisp_Object *args;
8976 {
8977   int i, j;
8978   int changed[coding_category_max];
8979   enum coding_category priorities[coding_category_max];
8980
8981   bzero (changed, sizeof changed);
8982
8983   for (i = j = 0; i < nargs; i++)
8984     {
8985       enum coding_category category;
8986       Lisp_Object spec, attrs;
8987
8988       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8989       attrs = AREF (spec, 0);
8990       category = XINT (CODING_ATTR_CATEGORY (attrs));
8991       if (changed[category])
8992         /* Ignore this coding system because a coding system of the
8993            same category already had a higher priority.  */
8994         continue;
8995       changed[category] = 1;
8996       priorities[j++] = category;
8997       if (coding_categories[category].id >= 0
8998           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8999         setup_coding_system (args[i], &coding_categories[category]);
9000       Fset (AREF (Vcoding_category_table, category), args[i]);
9001     }
9002
9003   /* Now we have decided top J priorities.  Reflect the order of the
9004      original priorities to the remaining priorities.  */
9005
9006   for (i = j, j = 0; i < coding_category_max; i++, j++)
9007     {
9008       while (j < coding_category_max
9009              && changed[coding_priorities[j]])
9010         j++;
9011       if (j == coding_category_max)
9012         abort ();
9013       priorities[i] = coding_priorities[j];
9014     }
9015
9016   bcopy (priorities, coding_priorities, sizeof priorities);
9017
9018   /* Update `coding-category-list'.  */
9019   Vcoding_category_list = Qnil;
9020   for (i = coding_category_max - 1; i >= 0; i--)
9021     Vcoding_category_list
9022       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9023                Vcoding_category_list);
9024
9025   return Qnil;
9026 }
9027
9028 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9029        Scoding_system_priority_list, 0, 1, 0,
9030        doc: /* Return a list of coding systems ordered by their priorities.
9031 HIGHESTP non-nil means just return the highest priority one.  */)
9032      (highestp)
9033      Lisp_Object highestp;
9034 {
9035   int i;
9036   Lisp_Object val;
9037
9038   for (i = 0, val = Qnil; i < coding_category_max; i++)
9039     {
9040       enum coding_category category = coding_priorities[i];
9041       int id = coding_categories[category].id;
9042       Lisp_Object attrs;
9043
9044       if (id < 0)
9045         continue;
9046       attrs = CODING_ID_ATTRS (id);
9047       if (! NILP (highestp))
9048         return CODING_ATTR_BASE_NAME (attrs);
9049       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9050     }
9051   return Fnreverse (val);
9052 }
9053
9054 static char *suffixes[] = { "-unix", "-dos", "-mac" };
9055
9056 static Lisp_Object
9057 make_subsidiaries (base)
9058      Lisp_Object base;
9059 {
9060   Lisp_Object subsidiaries;
9061   int base_name_len = SBYTES (SYMBOL_NAME (base));
9062   char *buf = (char *) alloca (base_name_len + 6);
9063   int i;
9064
9065   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9066   subsidiaries = Fmake_vector (make_number (3), Qnil);
9067   for (i = 0; i < 3; i++)
9068     {
9069       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9070       ASET (subsidiaries, i, intern (buf));
9071     }
9072   return subsidiaries;
9073 }
9074
9075
9076 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9077        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9078        doc: /* For internal use only.
9079 usage: (define-coding-system-internal ...)  */)
9080      (nargs, args)
9081      int nargs;
9082      Lisp_Object *args;
9083 {
9084   Lisp_Object name;
9085   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9086   Lisp_Object attrs;            /* Vector of attributes.  */
9087   Lisp_Object eol_type;
9088   Lisp_Object aliases;
9089   Lisp_Object coding_type, charset_list, safe_charsets;
9090   enum coding_category category;
9091   Lisp_Object tail, val;
9092   int max_charset_id = 0;
9093   int i;
9094
9095   if (nargs < coding_arg_max)
9096     goto short_args;
9097
9098   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9099
9100   name = args[coding_arg_name];
9101   CHECK_SYMBOL (name);
9102   CODING_ATTR_BASE_NAME (attrs) = name;
9103
9104   val = args[coding_arg_mnemonic];
9105   if (! STRINGP (val))
9106     CHECK_CHARACTER (val);
9107   CODING_ATTR_MNEMONIC (attrs) = val;
9108
9109   coding_type = args[coding_arg_coding_type];
9110   CHECK_SYMBOL (coding_type);
9111   CODING_ATTR_TYPE (attrs) = coding_type;
9112
9113   charset_list = args[coding_arg_charset_list];
9114   if (SYMBOLP (charset_list))
9115     {
9116       if (EQ (charset_list, Qiso_2022))
9117         {
9118           if (! EQ (coding_type, Qiso_2022))
9119             error ("Invalid charset-list");
9120           charset_list = Viso_2022_charset_list;
9121         }
9122       else if (EQ (charset_list, Qemacs_mule))
9123         {
9124           if (! EQ (coding_type, Qemacs_mule))
9125             error ("Invalid charset-list");
9126           charset_list = Vemacs_mule_charset_list;
9127         }
9128       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9129         if (max_charset_id < XFASTINT (XCAR (tail)))
9130           max_charset_id = XFASTINT (XCAR (tail));
9131     }
9132   else
9133     {
9134       charset_list = Fcopy_sequence (charset_list);
9135       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9136         {
9137           struct charset *charset;
9138
9139           val = XCAR (tail);
9140           CHECK_CHARSET_GET_CHARSET (val, charset);
9141           if (EQ (coding_type, Qiso_2022)
9142               ? CHARSET_ISO_FINAL (charset) < 0
9143               : EQ (coding_type, Qemacs_mule)
9144               ? CHARSET_EMACS_MULE_ID (charset) < 0
9145               : 0)
9146             error ("Can't handle charset `%s'",
9147                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9148
9149           XSETCAR (tail, make_number (charset->id));
9150           if (max_charset_id < charset->id)
9151             max_charset_id = charset->id;
9152         }
9153     }
9154   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9155
9156   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
9157                                 make_number (255));
9158   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9159     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9160   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9161
9162   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9163
9164   val = args[coding_arg_decode_translation_table];
9165   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9166     CHECK_SYMBOL (val);
9167   CODING_ATTR_DECODE_TBL (attrs) = val;
9168
9169   val = args[coding_arg_encode_translation_table];
9170   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9171     CHECK_SYMBOL (val);
9172   CODING_ATTR_ENCODE_TBL (attrs) = val;
9173
9174   val = args[coding_arg_post_read_conversion];
9175   CHECK_SYMBOL (val);
9176   CODING_ATTR_POST_READ (attrs) = val;
9177
9178   val = args[coding_arg_pre_write_conversion];
9179   CHECK_SYMBOL (val);
9180   CODING_ATTR_PRE_WRITE (attrs) = val;
9181
9182   val = args[coding_arg_default_char];
9183   if (NILP (val))
9184     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9185   else
9186     {
9187       CHECK_CHARACTER (val);
9188       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9189     }
9190
9191   val = args[coding_arg_for_unibyte];
9192   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9193
9194   val = args[coding_arg_plist];
9195   CHECK_LIST (val);
9196   CODING_ATTR_PLIST (attrs) = val;
9197
9198   if (EQ (coding_type, Qcharset))
9199     {
9200       /* Generate a lisp vector of 256 elements.  Each element is nil,
9201          integer, or a list of charset IDs.
9202
9203          If Nth element is nil, the byte code N is invalid in this
9204          coding system.
9205
9206          If Nth element is a number NUM, N is the first byte of a
9207          charset whose ID is NUM.
9208
9209          If Nth element is a list of charset IDs, N is the first byte
9210          of one of them.  The list is sorted by dimensions of the
9211          charsets.  A charset of smaller dimension comes firtst. */
9212       val = Fmake_vector (make_number (256), Qnil);
9213
9214       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9215         {
9216           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9217           int dim = CHARSET_DIMENSION (charset);
9218           int idx = (dim - 1) * 4;
9219
9220           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9221             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9222
9223           for (i = charset->code_space[idx];
9224                i <= charset->code_space[idx + 1]; i++)
9225             {
9226               Lisp_Object tmp, tmp2;
9227               int dim2;
9228
9229               tmp = AREF (val, i);
9230               if (NILP (tmp))
9231                 tmp = XCAR (tail);
9232               else if (NUMBERP (tmp))
9233                 {
9234                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9235                   if (dim < dim2)
9236                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9237                   else
9238                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9239                 }
9240               else
9241                 {
9242                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9243                     {
9244                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9245                       if (dim < dim2)
9246                         break;
9247                     }
9248                   if (NILP (tmp2))
9249                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9250                   else
9251                     {
9252                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9253                       XSETCAR (tmp2, XCAR (tail));
9254                     }
9255                 }
9256               ASET (val, i, tmp);
9257             }
9258         }
9259       ASET (attrs, coding_attr_charset_valids, val);
9260       category = coding_category_charset;
9261     }
9262   else if (EQ (coding_type, Qccl))
9263     {
9264       Lisp_Object valids;
9265
9266       if (nargs < coding_arg_ccl_max)
9267         goto short_args;
9268
9269       val = args[coding_arg_ccl_decoder];
9270       CHECK_CCL_PROGRAM (val);
9271       if (VECTORP (val))
9272         val = Fcopy_sequence (val);
9273       ASET (attrs, coding_attr_ccl_decoder, val);
9274
9275       val = args[coding_arg_ccl_encoder];
9276       CHECK_CCL_PROGRAM (val);
9277       if (VECTORP (val))
9278         val = Fcopy_sequence (val);
9279       ASET (attrs, coding_attr_ccl_encoder, val);
9280
9281       val = args[coding_arg_ccl_valids];
9282       valids = Fmake_string (make_number (256), make_number (0));
9283       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9284         {
9285           int from, to;
9286
9287           val = Fcar (tail);
9288           if (INTEGERP (val))
9289             {
9290               from = to = XINT (val);
9291               if (from < 0 || from > 255)
9292                 args_out_of_range_3 (val, make_number (0), make_number (255));
9293             }
9294           else
9295             {
9296               CHECK_CONS (val);
9297               CHECK_NATNUM_CAR (val);
9298               CHECK_NATNUM_CDR (val);
9299               from = XINT (XCAR (val));
9300               if (from > 255)
9301                 args_out_of_range_3 (XCAR (val),
9302                                      make_number (0), make_number (255));
9303               to = XINT (XCDR (val));
9304               if (to < from || to > 255)
9305                 args_out_of_range_3 (XCDR (val),
9306                                      XCAR (val), make_number (255));
9307             }
9308           for (i = from; i <= to; i++)
9309             SSET (valids, i, 1);
9310         }
9311       ASET (attrs, coding_attr_ccl_valids, valids);
9312
9313       category = coding_category_ccl;
9314     }
9315   else if (EQ (coding_type, Qutf_16))
9316     {
9317       Lisp_Object bom, endian;
9318
9319       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9320
9321       if (nargs < coding_arg_utf16_max)
9322         goto short_args;
9323
9324       bom = args[coding_arg_utf16_bom];
9325       if (! NILP (bom) && ! EQ (bom, Qt))
9326         {
9327           CHECK_CONS (bom);
9328           val = XCAR (bom);
9329           CHECK_CODING_SYSTEM (val);
9330           val = XCDR (bom);
9331           CHECK_CODING_SYSTEM (val);
9332         }
9333       ASET (attrs, coding_attr_utf_bom, bom);
9334
9335       endian = args[coding_arg_utf16_endian];
9336       CHECK_SYMBOL (endian);
9337       if (NILP (endian))
9338         endian = Qbig;
9339       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9340         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9341       ASET (attrs, coding_attr_utf_16_endian, endian);
9342
9343       category = (CONSP (bom)
9344                   ? coding_category_utf_16_auto
9345                   : NILP (bom)
9346                   ? (EQ (endian, Qbig)
9347                      ? coding_category_utf_16_be_nosig
9348                      : coding_category_utf_16_le_nosig)
9349                   : (EQ (endian, Qbig)
9350                      ? coding_category_utf_16_be
9351                      : coding_category_utf_16_le));
9352     }
9353   else if (EQ (coding_type, Qiso_2022))
9354     {
9355       Lisp_Object initial, reg_usage, request, flags;
9356       int i;
9357
9358       if (nargs < coding_arg_iso2022_max)
9359         goto short_args;
9360
9361       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9362       CHECK_VECTOR (initial);
9363       for (i = 0; i < 4; i++)
9364         {
9365           val = Faref (initial, make_number (i));
9366           if (! NILP (val))
9367             {
9368               struct charset *charset;
9369
9370               CHECK_CHARSET_GET_CHARSET (val, charset);
9371               ASET (initial, i, make_number (CHARSET_ID (charset)));
9372               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9373                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9374             }
9375           else
9376             ASET (initial, i, make_number (-1));
9377         }
9378
9379       reg_usage = args[coding_arg_iso2022_reg_usage];
9380       CHECK_CONS (reg_usage);
9381       CHECK_NUMBER_CAR (reg_usage);
9382       CHECK_NUMBER_CDR (reg_usage);
9383
9384       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9385       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9386         {
9387           int id;
9388           Lisp_Object tmp;
9389
9390           val = Fcar (tail);
9391           CHECK_CONS (val);
9392           tmp = XCAR (val);
9393           CHECK_CHARSET_GET_ID (tmp, id);
9394           CHECK_NATNUM_CDR (val);
9395           if (XINT (XCDR (val)) >= 4)
9396             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9397           XSETCAR (val, make_number (id));
9398         }
9399
9400       flags = args[coding_arg_iso2022_flags];
9401       CHECK_NATNUM (flags);
9402       i = XINT (flags);
9403       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9404         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9405
9406       ASET (attrs, coding_attr_iso_initial, initial);
9407       ASET (attrs, coding_attr_iso_usage, reg_usage);
9408       ASET (attrs, coding_attr_iso_request, request);
9409       ASET (attrs, coding_attr_iso_flags, flags);
9410       setup_iso_safe_charsets (attrs);
9411
9412       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9413         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9414                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9415                     ? coding_category_iso_7_else
9416                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9417                     ? coding_category_iso_7
9418                     : coding_category_iso_7_tight);
9419       else
9420         {
9421           int id = XINT (AREF (initial, 1));
9422
9423           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9424                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9425                        || id < 0)
9426                       ? coding_category_iso_8_else
9427                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9428                       ? coding_category_iso_8_1
9429                       : coding_category_iso_8_2);
9430         }
9431       if (category != coding_category_iso_8_1
9432           && category != coding_category_iso_8_2)
9433         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9434     }
9435   else if (EQ (coding_type, Qemacs_mule))
9436     {
9437       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9438         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9439       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9440       category = coding_category_emacs_mule;
9441     }
9442   else if (EQ (coding_type, Qshift_jis))
9443     {
9444
9445       struct charset *charset;
9446
9447       if (XINT (Flength (charset_list)) != 3
9448           && XINT (Flength (charset_list)) != 4)
9449         error ("There should be three or four charsets");
9450
9451       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9452       if (CHARSET_DIMENSION (charset) != 1)
9453         error ("Dimension of charset %s is not one",
9454                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9455       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9456         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9457
9458       charset_list = XCDR (charset_list);
9459       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9460       if (CHARSET_DIMENSION (charset) != 1)
9461         error ("Dimension of charset %s is not one",
9462                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9463
9464       charset_list = XCDR (charset_list);
9465       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9466       if (CHARSET_DIMENSION (charset) != 2)
9467         error ("Dimension of charset %s is not two",
9468                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9469
9470       charset_list = XCDR (charset_list);
9471       if (! NILP (charset_list))
9472         {
9473           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9474           if (CHARSET_DIMENSION (charset) != 2)
9475             error ("Dimension of charset %s is not two",
9476                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9477         }
9478
9479       category = coding_category_sjis;
9480       Vsjis_coding_system = name;
9481     }
9482   else if (EQ (coding_type, Qbig5))
9483     {
9484       struct charset *charset;
9485
9486       if (XINT (Flength (charset_list)) != 2)
9487         error ("There should be just two charsets");
9488
9489       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9490       if (CHARSET_DIMENSION (charset) != 1)
9491         error ("Dimension of charset %s is not one",
9492                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9493       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9494         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9495
9496       charset_list = XCDR (charset_list);
9497       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9498       if (CHARSET_DIMENSION (charset) != 2)
9499         error ("Dimension of charset %s is not two",
9500                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9501
9502       category = coding_category_big5;
9503       Vbig5_coding_system = name;
9504     }
9505   else if (EQ (coding_type, Qraw_text))
9506     {
9507       category = coding_category_raw_text;
9508       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9509     }
9510   else if (EQ (coding_type, Qutf_8))
9511     {
9512       Lisp_Object bom;
9513
9514       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9515
9516       if (nargs < coding_arg_utf8_max)
9517         goto short_args;
9518
9519       bom = args[coding_arg_utf8_bom];
9520       if (! NILP (bom) && ! EQ (bom, Qt))
9521         {
9522           CHECK_CONS (bom);
9523           val = XCAR (bom);
9524           CHECK_CODING_SYSTEM (val);
9525           val = XCDR (bom);
9526           CHECK_CODING_SYSTEM (val);
9527         }
9528       ASET (attrs, coding_attr_utf_bom, bom);
9529
9530       category = (CONSP (bom) ? coding_category_utf_8_auto
9531                   : NILP (bom) ? coding_category_utf_8_nosig
9532                   : coding_category_utf_8_sig);
9533     }
9534   else if (EQ (coding_type, Qundecided))
9535     category = coding_category_undecided;
9536   else
9537     error ("Invalid coding system type: %s",
9538            SDATA (SYMBOL_NAME (coding_type)));
9539
9540   CODING_ATTR_CATEGORY (attrs) = make_number (category);
9541   CODING_ATTR_PLIST (attrs)
9542     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9543                                 CODING_ATTR_PLIST (attrs)));
9544   CODING_ATTR_PLIST (attrs)
9545     = Fcons (QCascii_compatible_p,
9546              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9547                     CODING_ATTR_PLIST (attrs)));
9548
9549   eol_type = args[coding_arg_eol_type];
9550   if (! NILP (eol_type)
9551       && ! EQ (eol_type, Qunix)
9552       && ! EQ (eol_type, Qdos)
9553       && ! EQ (eol_type, Qmac))
9554     error ("Invalid eol-type");
9555
9556   aliases = Fcons (name, Qnil);
9557
9558   if (NILP (eol_type))
9559     {
9560       eol_type = make_subsidiaries (name);
9561       for (i = 0; i < 3; i++)
9562         {
9563           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9564
9565           this_name = AREF (eol_type, i);
9566           this_aliases = Fcons (this_name, Qnil);
9567           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9568           this_spec = Fmake_vector (make_number (3), attrs);
9569           ASET (this_spec, 1, this_aliases);
9570           ASET (this_spec, 2, this_eol_type);
9571           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9572           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
9573           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9574           if (NILP (val))
9575             Vcoding_system_alist
9576               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9577                        Vcoding_system_alist);
9578         }
9579     }
9580
9581   spec_vec = Fmake_vector (make_number (3), attrs);
9582   ASET (spec_vec, 1, aliases);
9583   ASET (spec_vec, 2, eol_type);
9584
9585   Fputhash (name, spec_vec, Vcoding_system_hash_table);
9586   Vcoding_system_list = Fcons (name, Vcoding_system_list);
9587   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9588   if (NILP (val))
9589     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9590                                   Vcoding_system_alist);
9591
9592   {
9593     int id = coding_categories[category].id;
9594
9595     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9596       setup_coding_system (name, &coding_categories[category]);
9597   }
9598
9599   return Qnil;
9600
9601  short_args:
9602   return Fsignal (Qwrong_number_of_arguments,
9603                   Fcons (intern ("define-coding-system-internal"),
9604                          make_number (nargs)));
9605 }
9606
9607
9608 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9609        3, 3, 0,
9610        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
9611   (coding_system, prop, val)
9612      Lisp_Object coding_system, prop, val;
9613 {
9614   Lisp_Object spec, attrs;
9615
9616   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9617   attrs = AREF (spec, 0);
9618   if (EQ (prop, QCmnemonic))
9619     {
9620       if (! STRINGP (val))
9621         CHECK_CHARACTER (val);
9622       CODING_ATTR_MNEMONIC (attrs) = val;
9623     }
9624   else if (EQ (prop, QCdefault_char))
9625     {
9626       if (NILP (val))
9627         val = make_number (' ');
9628       else
9629         CHECK_CHARACTER (val);
9630       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9631     }
9632   else if (EQ (prop, QCdecode_translation_table))
9633     {
9634       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9635         CHECK_SYMBOL (val);
9636       CODING_ATTR_DECODE_TBL (attrs) = val;
9637     }
9638   else if (EQ (prop, QCencode_translation_table))
9639     {
9640       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9641         CHECK_SYMBOL (val);
9642       CODING_ATTR_ENCODE_TBL (attrs) = val;
9643     }
9644   else if (EQ (prop, QCpost_read_conversion))
9645     {
9646       CHECK_SYMBOL (val);
9647       CODING_ATTR_POST_READ (attrs) = val;
9648     }
9649   else if (EQ (prop, QCpre_write_conversion))
9650     {
9651       CHECK_SYMBOL (val);
9652       CODING_ATTR_PRE_WRITE (attrs) = val;
9653     }
9654   else if (EQ (prop, QCascii_compatible_p))
9655     {
9656       CODING_ATTR_ASCII_COMPAT (attrs) = val;
9657     }
9658
9659   CODING_ATTR_PLIST (attrs)
9660     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9661   return val;
9662 }
9663
9664
9665 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9666        Sdefine_coding_system_alias, 2, 2, 0,
9667        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
9668      (alias, coding_system)
9669      Lisp_Object alias, coding_system;
9670 {
9671   Lisp_Object spec, aliases, eol_type, val;
9672
9673   CHECK_SYMBOL (alias);
9674   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9675   aliases = AREF (spec, 1);
9676   /* ALIASES should be a list of length more than zero, and the first
9677      element is a base coding system.  Append ALIAS at the tail of the
9678      list.  */
9679   while (!NILP (XCDR (aliases)))
9680     aliases = XCDR (aliases);
9681   XSETCDR (aliases, Fcons (alias, Qnil));
9682
9683   eol_type = AREF (spec, 2);
9684   if (VECTORP (eol_type))
9685     {
9686       Lisp_Object subsidiaries;
9687       int i;
9688
9689       subsidiaries = make_subsidiaries (alias);
9690       for (i = 0; i < 3; i++)
9691         Fdefine_coding_system_alias (AREF (subsidiaries, i),
9692                                      AREF (eol_type, i));
9693     }
9694
9695   Fputhash (alias, spec, Vcoding_system_hash_table);
9696   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
9697   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9698   if (NILP (val))
9699     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9700                                   Vcoding_system_alist);
9701
9702   return Qnil;
9703 }
9704
9705 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9706        1, 1, 0,
9707        doc: /* Return the base of CODING-SYSTEM.
9708 Any alias or subsidiary coding system is not a base coding system.  */)
9709   (coding_system)
9710      Lisp_Object coding_system;
9711 {
9712   Lisp_Object spec, attrs;
9713
9714   if (NILP (coding_system))
9715     return (Qno_conversion);
9716   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9717   attrs = AREF (spec, 0);
9718   return CODING_ATTR_BASE_NAME (attrs);
9719 }
9720
9721 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9722        1, 1, 0,
9723        doc: "Return the property list of CODING-SYSTEM.")
9724      (coding_system)
9725      Lisp_Object coding_system;
9726 {
9727   Lisp_Object spec, attrs;
9728
9729   if (NILP (coding_system))
9730     coding_system = Qno_conversion;
9731   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9732   attrs = AREF (spec, 0);
9733   return CODING_ATTR_PLIST (attrs);
9734 }
9735
9736
9737 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9738        1, 1, 0,
9739        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
9740      (coding_system)
9741      Lisp_Object coding_system;
9742 {
9743   Lisp_Object spec;
9744
9745   if (NILP (coding_system))
9746     coding_system = Qno_conversion;
9747   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9748   return AREF (spec, 1);
9749 }
9750
9751 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9752        Scoding_system_eol_type, 1, 1, 0,
9753        doc: /* Return eol-type of CODING-SYSTEM.
9754 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
9755
9756 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9757 and CR respectively.
9758
9759 A vector value indicates that a format of end-of-line should be
9760 detected automatically.  Nth element of the vector is the subsidiary
9761 coding system whose eol-type is N.  */)
9762      (coding_system)
9763      Lisp_Object coding_system;
9764 {
9765   Lisp_Object spec, eol_type;
9766   int n;
9767
9768   if (NILP (coding_system))
9769     coding_system = Qno_conversion;
9770   if (! CODING_SYSTEM_P (coding_system))
9771     return Qnil;
9772   spec = CODING_SYSTEM_SPEC (coding_system);
9773   eol_type = AREF (spec, 2);
9774   if (VECTORP (eol_type))
9775     return Fcopy_sequence (eol_type);
9776   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9777   return make_number (n);
9778 }
9779
9780 #endif /* emacs */
9781
9782 \f
9783 /*** 9. Post-amble ***/
9784
9785 void
9786 init_coding_once ()
9787 {
9788   int i;
9789
9790   for (i = 0; i < coding_category_max; i++)
9791     {
9792       coding_categories[i].id = -1;
9793       coding_priorities[i] = i;
9794     }
9795
9796   /* ISO2022 specific initialize routine.  */
9797   for (i = 0; i < 0x20; i++)
9798     iso_code_class[i] = ISO_control_0;
9799   for (i = 0x21; i < 0x7F; i++)
9800     iso_code_class[i] = ISO_graphic_plane_0;
9801   for (i = 0x80; i < 0xA0; i++)
9802     iso_code_class[i] = ISO_control_1;
9803   for (i = 0xA1; i < 0xFF; i++)
9804     iso_code_class[i] = ISO_graphic_plane_1;
9805   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9806   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
9807   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9808   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9809   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9810   iso_code_class[ISO_CODE_ESC] = ISO_escape;
9811   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9812   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9813   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9814
9815   for (i = 0; i < 256; i++)
9816     {
9817       emacs_mule_bytes[i] = 1;
9818     }
9819   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9820   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9821   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9822   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
9823 }
9824
9825 #ifdef emacs
9826
9827 void
9828 syms_of_coding ()
9829 {
9830   staticpro (&Vcoding_system_hash_table);
9831   {
9832     Lisp_Object args[2];
9833     args[0] = QCtest;
9834     args[1] = Qeq;
9835     Vcoding_system_hash_table = Fmake_hash_table (2, args);
9836   }
9837
9838   staticpro (&Vsjis_coding_system);
9839   Vsjis_coding_system = Qnil;
9840
9841   staticpro (&Vbig5_coding_system);
9842   Vbig5_coding_system = Qnil;
9843
9844   staticpro (&Vcode_conversion_reused_workbuf);
9845   Vcode_conversion_reused_workbuf = Qnil;
9846
9847   staticpro (&Vcode_conversion_workbuf_name);
9848   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9849
9850   reused_workbuf_in_use = 0;
9851
9852   DEFSYM (Qcharset, "charset");
9853   DEFSYM (Qtarget_idx, "target-idx");
9854   DEFSYM (Qcoding_system_history, "coding-system-history");
9855   Fset (Qcoding_system_history, Qnil);
9856
9857   /* Target FILENAME is the first argument.  */
9858   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9859   /* Target FILENAME is the third argument.  */
9860   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9861
9862   DEFSYM (Qcall_process, "call-process");
9863   /* Target PROGRAM is the first argument.  */
9864   Fput (Qcall_process, Qtarget_idx, make_number (0));
9865
9866   DEFSYM (Qcall_process_region, "call-process-region");
9867   /* Target PROGRAM is the third argument.  */
9868   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9869
9870   DEFSYM (Qstart_process, "start-process");
9871   /* Target PROGRAM is the third argument.  */
9872   Fput (Qstart_process, Qtarget_idx, make_number (2));
9873
9874   DEFSYM (Qopen_network_stream, "open-network-stream");
9875   /* Target SERVICE is the fourth argument.  */
9876   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9877
9878   DEFSYM (Qcoding_system, "coding-system");
9879   DEFSYM (Qcoding_aliases, "coding-aliases");
9880
9881   DEFSYM (Qeol_type, "eol-type");
9882   DEFSYM (Qunix, "unix");
9883   DEFSYM (Qdos, "dos");
9884
9885   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9886   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9887   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9888   DEFSYM (Qdefault_char, "default-char");
9889   DEFSYM (Qundecided, "undecided");
9890   DEFSYM (Qno_conversion, "no-conversion");
9891   DEFSYM (Qraw_text, "raw-text");
9892
9893   DEFSYM (Qiso_2022, "iso-2022");
9894
9895   DEFSYM (Qutf_8, "utf-8");
9896   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9897
9898   DEFSYM (Qutf_16, "utf-16");
9899   DEFSYM (Qbig, "big");
9900   DEFSYM (Qlittle, "little");
9901
9902   DEFSYM (Qshift_jis, "shift-jis");
9903   DEFSYM (Qbig5, "big5");
9904
9905   DEFSYM (Qcoding_system_p, "coding-system-p");
9906
9907   DEFSYM (Qcoding_system_error, "coding-system-error");
9908   Fput (Qcoding_system_error, Qerror_conditions,
9909         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9910   Fput (Qcoding_system_error, Qerror_message,
9911         build_string ("Invalid coding system"));
9912
9913   /* Intern this now in case it isn't already done.
9914      Setting this variable twice is harmless.
9915      But don't staticpro it here--that is done in alloc.c.  */
9916   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9917
9918   DEFSYM (Qtranslation_table, "translation-table");
9919   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9920   DEFSYM (Qtranslation_table_id, "translation-table-id");
9921   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9922   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9923
9924   DEFSYM (Qvalid_codes, "valid-codes");
9925
9926   DEFSYM (Qemacs_mule, "emacs-mule");
9927
9928   DEFSYM (QCcategory, ":category");
9929   DEFSYM (QCmnemonic, ":mnemonic");
9930   DEFSYM (QCdefault_char, ":default-char");
9931   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9932   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9933   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9934   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9935   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
9936
9937   Vcoding_category_table
9938     = Fmake_vector (make_number (coding_category_max), Qnil);
9939   staticpro (&Vcoding_category_table);
9940   /* Followings are target of code detection.  */
9941   ASET (Vcoding_category_table, coding_category_iso_7,
9942         intern ("coding-category-iso-7"));
9943   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9944         intern ("coding-category-iso-7-tight"));
9945   ASET (Vcoding_category_table, coding_category_iso_8_1,
9946         intern ("coding-category-iso-8-1"));
9947   ASET (Vcoding_category_table, coding_category_iso_8_2,
9948         intern ("coding-category-iso-8-2"));
9949   ASET (Vcoding_category_table, coding_category_iso_7_else,
9950         intern ("coding-category-iso-7-else"));
9951   ASET (Vcoding_category_table, coding_category_iso_8_else,
9952         intern ("coding-category-iso-8-else"));
9953   ASET (Vcoding_category_table, coding_category_utf_8_auto,
9954         intern ("coding-category-utf-8-auto"));
9955   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
9956         intern ("coding-category-utf-8"));
9957   ASET (Vcoding_category_table, coding_category_utf_8_sig,
9958         intern ("coding-category-utf-8-sig"));
9959   ASET (Vcoding_category_table, coding_category_utf_16_be,
9960         intern ("coding-category-utf-16-be"));
9961   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9962         intern ("coding-category-utf-16-auto"));
9963   ASET (Vcoding_category_table, coding_category_utf_16_le,
9964         intern ("coding-category-utf-16-le"));
9965   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9966         intern ("coding-category-utf-16-be-nosig"));
9967   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9968         intern ("coding-category-utf-16-le-nosig"));
9969   ASET (Vcoding_category_table, coding_category_charset,
9970         intern ("coding-category-charset"));
9971   ASET (Vcoding_category_table, coding_category_sjis,
9972         intern ("coding-category-sjis"));
9973   ASET (Vcoding_category_table, coding_category_big5,
9974         intern ("coding-category-big5"));
9975   ASET (Vcoding_category_table, coding_category_ccl,
9976         intern ("coding-category-ccl"));
9977   ASET (Vcoding_category_table, coding_category_emacs_mule,
9978         intern ("coding-category-emacs-mule"));
9979   /* Followings are NOT target of code detection.  */
9980   ASET (Vcoding_category_table, coding_category_raw_text,
9981         intern ("coding-category-raw-text"));
9982   ASET (Vcoding_category_table, coding_category_undecided,
9983         intern ("coding-category-undecided"));
9984
9985   DEFSYM (Qinsufficient_source, "insufficient-source");
9986   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9987   DEFSYM (Qinvalid_source, "invalid-source");
9988   DEFSYM (Qinterrupted, "interrupted");
9989   DEFSYM (Qinsufficient_memory, "insufficient-memory");
9990   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
9991
9992   defsubr (&Scoding_system_p);
9993   defsubr (&Sread_coding_system);
9994   defsubr (&Sread_non_nil_coding_system);
9995   defsubr (&Scheck_coding_system);
9996   defsubr (&Sdetect_coding_region);
9997   defsubr (&Sdetect_coding_string);
9998   defsubr (&Sfind_coding_systems_region_internal);
9999   defsubr (&Sunencodable_char_position);
10000   defsubr (&Scheck_coding_systems_region);
10001   defsubr (&Sdecode_coding_region);
10002   defsubr (&Sencode_coding_region);
10003   defsubr (&Sdecode_coding_string);
10004   defsubr (&Sencode_coding_string);
10005   defsubr (&Sdecode_sjis_char);
10006   defsubr (&Sencode_sjis_char);
10007   defsubr (&Sdecode_big5_char);
10008   defsubr (&Sencode_big5_char);
10009   defsubr (&Sset_terminal_coding_system_internal);
10010   defsubr (&Sset_safe_terminal_coding_system_internal);
10011   defsubr (&Sterminal_coding_system);
10012   defsubr (&Sset_keyboard_coding_system_internal);
10013   defsubr (&Skeyboard_coding_system);
10014   defsubr (&Sfind_operation_coding_system);
10015   defsubr (&Sset_coding_system_priority);
10016   defsubr (&Sdefine_coding_system_internal);
10017   defsubr (&Sdefine_coding_system_alias);
10018   defsubr (&Scoding_system_put);
10019   defsubr (&Scoding_system_base);
10020   defsubr (&Scoding_system_plist);
10021   defsubr (&Scoding_system_aliases);
10022   defsubr (&Scoding_system_eol_type);
10023   defsubr (&Scoding_system_priority_list);
10024
10025   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10026                doc: /* List of coding systems.
10027
10028 Do not alter the value of this variable manually.  This variable should be
10029 updated by the functions `define-coding-system' and
10030 `define-coding-system-alias'.  */);
10031   Vcoding_system_list = Qnil;
10032
10033   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10034                doc: /* Alist of coding system names.
10035 Each element is one element list of coding system name.
10036 This variable is given to `completing-read' as COLLECTION argument.
10037
10038 Do not alter the value of this variable manually.  This variable should be
10039 updated by the functions `make-coding-system' and
10040 `define-coding-system-alias'.  */);
10041   Vcoding_system_alist = Qnil;
10042
10043   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10044                doc: /* List of coding-categories (symbols) ordered by priority.
10045
10046 On detecting a coding system, Emacs tries code detection algorithms
10047 associated with each coding-category one by one in this order.  When
10048 one algorithm agrees with a byte sequence of source text, the coding
10049 system bound to the corresponding coding-category is selected.
10050
10051 Don't modify this variable directly, but use `set-coding-priority'.  */);
10052   {
10053     int i;
10054
10055     Vcoding_category_list = Qnil;
10056     for (i = coding_category_max - 1; i >= 0; i--)
10057       Vcoding_category_list
10058         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10059                  Vcoding_category_list);
10060   }
10061
10062   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10063                doc: /* Specify the coding system for read operations.
10064 It is useful to bind this variable with `let', but do not set it globally.
10065 If the value is a coding system, it is used for decoding on read operation.
10066 If not, an appropriate element is used from one of the coding system alists.
10067 There are three such tables: `file-coding-system-alist',
10068 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10069   Vcoding_system_for_read = Qnil;
10070
10071   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10072                doc: /* Specify the coding system for write operations.
10073 Programs bind this variable with `let', but you should not set it globally.
10074 If the value is a coding system, it is used for encoding of output,
10075 when writing it to a file and when sending it to a file or subprocess.
10076
10077 If this does not specify a coding system, an appropriate element
10078 is used from one of the coding system alists.
10079 There are three such tables: `file-coding-system-alist',
10080 `process-coding-system-alist', and `network-coding-system-alist'.
10081 For output to files, if the above procedure does not specify a coding system,
10082 the value of `buffer-file-coding-system' is used.  */);
10083   Vcoding_system_for_write = Qnil;
10084
10085   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10086                doc: /*
10087 Coding system used in the latest file or process I/O.  */);
10088   Vlast_coding_system_used = Qnil;
10089
10090   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10091                doc: /*
10092 Error status of the last code conversion.
10093
10094 When an error was detected in the last code conversion, this variable
10095 is set to one of the following symbols.
10096   `insufficient-source'
10097   `inconsistent-eol'
10098   `invalid-source'
10099   `interrupted'
10100   `insufficient-memory'
10101 When no error was detected, the value doesn't change.  So, to check
10102 the error status of a code conversion by this variable, you must
10103 explicitly set this variable to nil before performing code
10104 conversion.  */);
10105   Vlast_code_conversion_error = Qnil;
10106
10107   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10108                doc: /*
10109 *Non-nil means always inhibit code conversion of end-of-line format.
10110 See info node `Coding Systems' and info node `Text and Binary' concerning
10111 such conversion.  */);
10112   inhibit_eol_conversion = 0;
10113
10114   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10115                doc: /*
10116 Non-nil means process buffer inherits coding system of process output.
10117 Bind it to t if the process output is to be treated as if it were a file
10118 read from some filesystem.  */);
10119   inherit_process_coding_system = 0;
10120
10121   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10122                doc: /*
10123 Alist to decide a coding system to use for a file I/O operation.
10124 The format is ((PATTERN . VAL) ...),
10125 where PATTERN is a regular expression matching a file name,
10126 VAL is a coding system, a cons of coding systems, or a function symbol.
10127 If VAL is a coding system, it is used for both decoding and encoding
10128 the file contents.
10129 If VAL is a cons of coding systems, the car part is used for decoding,
10130 and the cdr part is used for encoding.
10131 If VAL is a function symbol, the function must return a coding system
10132 or a cons of coding systems which are used as above.  The function is
10133 called with an argument that is a list of the arguments with which
10134 `find-operation-coding-system' was called.  If the function can't decide
10135 a coding system, it can return `undecided' so that the normal
10136 code-detection is performed.
10137
10138 See also the function `find-operation-coding-system'
10139 and the variable `auto-coding-alist'.  */);
10140   Vfile_coding_system_alist = Qnil;
10141
10142   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10143                doc: /*
10144 Alist to decide a coding system to use for a process I/O operation.
10145 The format is ((PATTERN . VAL) ...),
10146 where PATTERN is a regular expression matching a program name,
10147 VAL is a coding system, a cons of coding systems, or a function symbol.
10148 If VAL is a coding system, it is used for both decoding what received
10149 from the program and encoding what sent to the program.
10150 If VAL is a cons of coding systems, the car part is used for decoding,
10151 and the cdr part is used for encoding.
10152 If VAL is a function symbol, the function must return a coding system
10153 or a cons of coding systems which are used as above.
10154
10155 See also the function `find-operation-coding-system'.  */);
10156   Vprocess_coding_system_alist = Qnil;
10157
10158   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10159                doc: /*
10160 Alist to decide a coding system to use for a network I/O operation.
10161 The format is ((PATTERN . VAL) ...),
10162 where PATTERN is a regular expression matching a network service name
10163 or is a port number to connect to,
10164 VAL is a coding system, a cons of coding systems, or a function symbol.
10165 If VAL is a coding system, it is used for both decoding what received
10166 from the network stream and encoding what sent to the network stream.
10167 If VAL is a cons of coding systems, the car part is used for decoding,
10168 and the cdr part is used for encoding.
10169 If VAL is a function symbol, the function must return a coding system
10170 or a cons of coding systems which are used as above.
10171
10172 See also the function `find-operation-coding-system'.  */);
10173   Vnetwork_coding_system_alist = Qnil;
10174
10175   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10176                doc: /* Coding system to use with system messages.
10177 Also used for decoding keyboard input on X Window system.  */);
10178   Vlocale_coding_system = Qnil;
10179
10180   /* The eol mnemonics are reset in startup.el system-dependently.  */
10181   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10182                doc: /*
10183 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10184   eol_mnemonic_unix = build_string (":");
10185
10186   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10187                doc: /*
10188 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10189   eol_mnemonic_dos = build_string ("\\");
10190
10191   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10192                doc: /*
10193 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10194   eol_mnemonic_mac = build_string ("/");
10195
10196   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10197                doc: /*
10198 *String displayed in mode line when end-of-line format is not yet determined.  */);
10199   eol_mnemonic_undecided = build_string (":");
10200
10201   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10202                doc: /*
10203 *Non-nil enables character translation while encoding and decoding.  */);
10204   Venable_character_translation = Qt;
10205
10206   DEFVAR_LISP ("standard-translation-table-for-decode",
10207                &Vstandard_translation_table_for_decode,
10208                doc: /* Table for translating characters while decoding.  */);
10209   Vstandard_translation_table_for_decode = Qnil;
10210
10211   DEFVAR_LISP ("standard-translation-table-for-encode",
10212                &Vstandard_translation_table_for_encode,
10213                doc: /* Table for translating characters while encoding.  */);
10214   Vstandard_translation_table_for_encode = Qnil;
10215
10216   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10217                doc: /* Alist of charsets vs revision numbers.
10218 While encoding, if a charset (car part of an element) is found,
10219 designate it with the escape sequence identifying revision (cdr part
10220 of the element).  */);
10221   Vcharset_revision_table = Qnil;
10222
10223   DEFVAR_LISP ("default-process-coding-system",
10224                &Vdefault_process_coding_system,
10225                doc: /* Cons of coding systems used for process I/O by default.
10226 The car part is used for decoding a process output,
10227 the cdr part is used for encoding a text to be sent to a process.  */);
10228   Vdefault_process_coding_system = Qnil;
10229
10230   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10231                doc: /*
10232 Table of extra Latin codes in the range 128..159 (inclusive).
10233 This is a vector of length 256.
10234 If Nth element is non-nil, the existence of code N in a file
10235 \(or output of subprocess) doesn't prevent it to be detected as
10236 a coding system of ISO 2022 variant which has a flag
10237 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10238 or reading output of a subprocess.
10239 Only 128th through 159th elements have a meaning.  */);
10240   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10241
10242   DEFVAR_LISP ("select-safe-coding-system-function",
10243                &Vselect_safe_coding_system_function,
10244                doc: /*
10245 Function to call to select safe coding system for encoding a text.
10246
10247 If set, this function is called to force a user to select a proper
10248 coding system which can encode the text in the case that a default
10249 coding system used in each operation can't encode the text.  The
10250 function should take care that the buffer is not modified while
10251 the coding system is being selected.
10252
10253 The default value is `select-safe-coding-system' (which see).  */);
10254   Vselect_safe_coding_system_function = Qnil;
10255
10256   DEFVAR_BOOL ("coding-system-require-warning",
10257                &coding_system_require_warning,
10258                doc: /* Internal use only.
10259 If non-nil, on writing a file, `select-safe-coding-system-function' is
10260 called even if `coding-system-for-write' is non-nil.  The command
10261 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10262   coding_system_require_warning = 0;
10263
10264
10265   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10266                &inhibit_iso_escape_detection,
10267                doc: /*
10268 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
10269
10270 By default, on reading a file, Emacs tries to detect how the text is
10271 encoded.  This code detection is sensitive to escape sequences.  If
10272 the sequence is valid as ISO2022, the code is determined as one of
10273 the ISO2022 encodings, and the file is decoded by the corresponding
10274 coding system (e.g. `iso-2022-7bit').
10275
10276 However, there may be a case that you want to read escape sequences in
10277 a file as is.  In such a case, you can set this variable to non-nil.
10278 Then, as the code detection ignores any escape sequences, no file is
10279 detected as encoded in some ISO2022 encoding.  The result is that all
10280 escape sequences become visible in a buffer.
10281
10282 The default value is nil, and it is strongly recommended not to change
10283 it.  That is because many Emacs Lisp source files that contain
10284 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10285 in Emacs's distribution, and they won't be decoded correctly on
10286 reading if you suppress escape sequence detection.
10287
10288 The other way to read escape sequences in a file without decoding is
10289 to explicitly specify some coding system that doesn't use ISO2022's
10290 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10291   inhibit_iso_escape_detection = 0;
10292
10293   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10294                doc: /* Char table for translating self-inserting characters.
10295 This is applied to the result of input methods, not their input.
10296 See also `keyboard-translate-table'.  */);
10297     Vtranslation_table_for_input = Qnil;
10298
10299   {
10300     Lisp_Object args[coding_arg_max];
10301     Lisp_Object plist[16];
10302     int i;
10303
10304     for (i = 0; i < coding_arg_max; i++)
10305       args[i] = Qnil;
10306
10307     plist[0] = intern (":name");
10308     plist[1] = args[coding_arg_name] = Qno_conversion;
10309     plist[2] = intern (":mnemonic");
10310     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10311     plist[4] = intern (":coding-type");
10312     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10313     plist[6] = intern (":ascii-compatible-p");
10314     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10315     plist[8] = intern (":default-char");
10316     plist[9] = args[coding_arg_default_char] = make_number (0);
10317     plist[10] = intern (":for-unibyte");
10318     plist[11] = args[coding_arg_for_unibyte] = Qt;
10319     plist[12] = intern (":docstring");
10320     plist[13] = build_string ("Do no conversion.\n\
10321 \n\
10322 When you visit a file with this coding, the file is read into a\n\
10323 unibyte buffer as is, thus each byte of a file is treated as a\n\
10324 character.");
10325     plist[14] = intern (":eol-type");
10326     plist[15] = args[coding_arg_eol_type] = Qunix;
10327     args[coding_arg_plist] = Flist (16, plist);
10328     Fdefine_coding_system_internal (coding_arg_max, args);
10329
10330     plist[1] = args[coding_arg_name] = Qundecided;
10331     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10332     plist[5] = args[coding_arg_coding_type] = Qundecided;
10333     /* This is already set.
10334        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10335     plist[8] = intern (":charset-list");
10336     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10337     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10338     plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10339     plist[15] = args[coding_arg_eol_type] = Qnil;
10340     args[coding_arg_plist] = Flist (16, plist);
10341     Fdefine_coding_system_internal (coding_arg_max, args);
10342   }
10343
10344   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10345
10346   {
10347     int i;
10348
10349     for (i = 0; i < coding_category_max; i++)
10350       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10351   }
10352 #if defined (MSDOS) || defined (WINDOWSNT)
10353   system_eol_type = Qdos;
10354 #else
10355   system_eol_type = Qunix;
10356 #endif
10357   staticpro (&system_eol_type);
10358 }
10359
10360 char *
10361 emacs_strerror (error_number)
10362      int error_number;
10363 {
10364   char *str;
10365
10366   synchronize_system_messages_locale ();
10367   str = strerror (error_number);
10368
10369   if (! NILP (Vlocale_coding_system))
10370     {
10371       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10372                                                       Vlocale_coding_system,
10373                                                       0);
10374       str = (char *) SDATA (dec);
10375     }
10376
10377   return str;
10378 }
10379
10380 #endif /* emacs */
10381
10382 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10383    (do not change this comment) */