src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008, 2009
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292 #include <setjmp.h>
 293
 294 #include "lisp.h"
 295 #include "buffer.h"
 296 #include "character.h"
 297 #include "charset.h"
 298 #include "ccl.h"
 299 #include "composite.h"
 300 #include "coding.h"
 301 #include "window.h"
 302 #include "frame.h"
 303 #include "termhooks.h"
 304
 305 Lisp_Object Vcoding_system_hash_table;
 306
 307 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 308 Lisp_Object Qunix, Qdos;
 309 extern Lisp_Object Qmac;        /* frame.c */
 310 Lisp_Object Qbuffer_file_coding_system;
 311 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 312 Lisp_Object Qdefault_char;
 313 Lisp_Object Qno_conversion, Qundecided;
 314 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 315 Lisp_Object Qbig, Qlittle;
 316 Lisp_Object Qcoding_system_history;
 317 Lisp_Object Qvalid_codes;
 318 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 319 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 320 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 321 Lisp_Object QCascii_compatible_p;
 322
 323 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 324 Lisp_Object Qcall_process, Qcall_process_region;
 325 Lisp_Object Qstart_process, Qopen_network_stream;
 326 Lisp_Object Qtarget_idx;
 327
 328 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 329 Lisp_Object Qinterrupted, Qinsufficient_memory;
 330
 331 extern Lisp_Object Qcompletion_ignore_case;
 332
 333 /* If a symbol has this property, evaluate the value to define the
 334    symbol as a coding system.  */
 335 static Lisp_Object Qcoding_system_define_form;
 336
 337 int coding_system_require_warning;
 338
 339 Lisp_Object Vselect_safe_coding_system_function;
 340
 341 /* Mnemonic string for each format of end-of-line.  */
 342 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 343 /* Mnemonic string to indicate format of end-of-line is not yet
 344    decided.  */
 345 Lisp_Object eol_mnemonic_undecided;
 346
 347 /* Format of end-of-line decided by system.  This is Qunix on
 348    Unix and Mac, Qdos on DOS/Windows.
 349    This has an effect only for external encoding (i.e. for output to
 350    file and process), not for in-buffer or Lisp string encoding.  */
 351 static Lisp_Object system_eol_type;
 352
 353 #ifdef emacs
 354
 355 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 356
 357 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 358
 359 /* Coding system emacs-mule and raw-text are for converting only
 360    end-of-line format.  */
 361 Lisp_Object Qemacs_mule, Qraw_text;
 362 Lisp_Object Qutf_8_emacs;
 363
 364 /* Coding-systems are handed between Emacs Lisp programs and C internal
 365    routines by the following three variables.  */
 366 /* Coding-system for reading files and receiving data from process.  */
 367 Lisp_Object Vcoding_system_for_read;
 368 /* Coding-system for writing files and sending data to process.  */
 369 Lisp_Object Vcoding_system_for_write;
 370 /* Coding-system actually used in the latest I/O.  */
 371 Lisp_Object Vlast_coding_system_used;
 372 /* Set to non-nil when an error is detected while code conversion.  */
 373 Lisp_Object Vlast_code_conversion_error;
 374 /* A vector of length 256 which contains information about special
 375    Latin codes (especially for dealing with Microsoft codes).  */
 376 Lisp_Object Vlatin_extra_code_table;
 377
 378 /* Flag to inhibit code conversion of end-of-line format.  */
 379 int inhibit_eol_conversion;
 380
 381 /* Flag to inhibit ISO2022 escape sequence detection.  */
 382 int inhibit_iso_escape_detection;
 383
 384 /* Flag to inhibit detection of binary files through null bytes.  */
 385 int inhibit_null_byte_detection;
 386
 387 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 388 int inherit_process_coding_system;
 389
 390 /* Coding system to be used to encode text for terminal display when
 391    terminal coding system is nil.  */
 392 struct coding_system safe_terminal_coding;
 393
 394 Lisp_Object Vfile_coding_system_alist;
 395 Lisp_Object Vprocess_coding_system_alist;
 396 Lisp_Object Vnetwork_coding_system_alist;
 397
 398 Lisp_Object Vlocale_coding_system;
 399
 400 #endif /* emacs */
 401
 402 /* Flag to tell if we look up translation table on character code
 403    conversion.  */
 404 Lisp_Object Venable_character_translation;
 405 /* Standard translation table to look up on decoding (reading).  */
 406 Lisp_Object Vstandard_translation_table_for_decode;
 407 /* Standard translation table to look up on encoding (writing).  */
 408 Lisp_Object Vstandard_translation_table_for_encode;
 409
 410 Lisp_Object Qtranslation_table;
 411 Lisp_Object Qtranslation_table_id;
 412 Lisp_Object Qtranslation_table_for_decode;
 413 Lisp_Object Qtranslation_table_for_encode;
 414
 415 /* Alist of charsets vs revision number.  */
 416 static Lisp_Object Vcharset_revision_table;
 417
 418 /* Default coding systems used for process I/O.  */
 419 Lisp_Object Vdefault_process_coding_system;
 420
 421 /* Char table for translating Quail and self-inserting input.  */
 422 Lisp_Object Vtranslation_table_for_input;
 423
 424 /* Two special coding systems.  */
 425 Lisp_Object Vsjis_coding_system;
 426 Lisp_Object Vbig5_coding_system;
 427
 428 /* ISO2022 section */
 429
 430 #define CODING_ISO_INITIAL(coding, reg)                 \
 431   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 432                      coding_attr_iso_initial),          \
 433                reg)))
 434
 435
 436 #define CODING_ISO_REQUEST(coding, charset_id)          \
 437   (((charset_id) <= (coding)->max_charset_id            \
 438     ? ((coding)->safe_charsets[charset_id] != 255       \
 439        ? (coding)->safe_charsets[charset_id]            \
 440        : -1)                                            \
 441     : -1))
 442
 443
 444 #define CODING_ISO_FLAGS(coding)        \
 445   ((coding)->spec.iso_2022.flags)
 446 #define CODING_ISO_DESIGNATION(coding, reg)     \
 447   ((coding)->spec.iso_2022.current_designation[reg])
 448 #define CODING_ISO_INVOCATION(coding, plane)    \
 449   ((coding)->spec.iso_2022.current_invocation[plane])
 450 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 451   ((coding)->spec.iso_2022.single_shifting)
 452 #define CODING_ISO_BOL(coding)  \
 453   ((coding)->spec.iso_2022.bol)
 454 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 455   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 456 #define CODING_ISO_CMP_STATUS(coding)   \
 457   (&(coding)->spec.iso_2022.cmp_status)
 458 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 459   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 460 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 461   ((coding)->spec.iso_2022.embedded_utf_8)
 462
 463 /* Control characters of ISO2022.  */
 464                         /* code */      /* function */
 465 #define ISO_CODE_LF     0x0A            /* line-feed */
 466 #define ISO_CODE_CR     0x0D            /* carriage-return */
 467 #define ISO_CODE_SO     0x0E            /* shift-out */
 468 #define ISO_CODE_SI     0x0F            /* shift-in */
 469 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 470 #define ISO_CODE_ESC    0x1B            /* escape */
 471 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 472 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 473 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 474
 475 /* All code (1-byte) of ISO2022 is classified into one of the
 476    followings.  */
 477 enum iso_code_class_type
 478   {
 479     ISO_control_0,              /* Control codes in the range
 480                                    0x00..0x1F and 0x7F, except for the
 481                                    following 5 codes.  */
 482     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 483     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 484     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 485     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 486     ISO_control_1,              /* Control codes in the range
 487                                    0x80..0x9F, except for the
 488                                    following 3 codes.  */
 489     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 490     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 491     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 492     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 493     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 494     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 495     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 496   };
 497
 498 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 499     `iso-flags' attribute of an iso2022 coding system.  */
 500
 501 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 502    instead of the correct short-form sequence (e.g. ESC $ A).  */
 503 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 504
 505 /* If set, reset graphic planes and registers at end-of-line to the
 506    initial state.  */
 507 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 508
 509 /* If set, reset graphic planes and registers before any control
 510    characters to the initial state.  */
 511 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 512
 513 /* If set, encode by 7-bit environment.  */
 514 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 515
 516 /* If set, use locking-shift function.  */
 517 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 518
 519 /* If set, use single-shift function.  Overwrite
 520    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 521 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 522
 523 /* If set, use designation escape sequence.  */
 524 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 525
 526 /* If set, produce revision number sequence.  */
 527 #define CODING_ISO_FLAG_REVISION        0x0080
 528
 529 /* If set, produce ISO6429's direction specifying sequence.  */
 530 #define CODING_ISO_FLAG_DIRECTION       0x0100
 531
 532 /* If set, assume designation states are reset at beginning of line on
 533    output.  */
 534 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 535
 536 /* If set, designation sequence should be placed at beginning of line
 537    on output.  */
 538 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 539
 540 /* If set, do not encode unsafe charactes on output.  */
 541 #define CODING_ISO_FLAG_SAFE            0x0800
 542
 543 /* If set, extra latin codes (128..159) are accepted as a valid code
 544    on input.  */
 545 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 546
 547 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 548
 549 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 550
 551 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 552
 553 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 554
 555 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 556
 557 /* A character to be produced on output if encoding of the original
 558    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 559 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 560
 561 /* UTF-8 section */
 562 #define CODING_UTF_8_BOM(coding)        \
 563   ((coding)->spec.utf_8_bom)
 564
 565 /* UTF-16 section */
 566 #define CODING_UTF_16_BOM(coding)       \
 567   ((coding)->spec.utf_16.bom)
 568
 569 #define CODING_UTF_16_ENDIAN(coding)    \
 570   ((coding)->spec.utf_16.endian)
 571
 572 #define CODING_UTF_16_SURROGATE(coding) \
 573   ((coding)->spec.utf_16.surrogate)
 574
 575
 576 /* CCL section */
 577 #define CODING_CCL_DECODER(coding)      \
 578   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 579 #define CODING_CCL_ENCODER(coding)      \
 580   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 581 #define CODING_CCL_VALIDS(coding)                                          \
 582   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 583
 584 /* Index for each coding category in `coding_categories' */
 585
 586 enum coding_category
 587   {
 588     coding_category_iso_7,
 589     coding_category_iso_7_tight,
 590     coding_category_iso_8_1,
 591     coding_category_iso_8_2,
 592     coding_category_iso_7_else,
 593     coding_category_iso_8_else,
 594     coding_category_utf_8_auto,
 595     coding_category_utf_8_nosig,
 596     coding_category_utf_8_sig,
 597     coding_category_utf_16_auto,
 598     coding_category_utf_16_be,
 599     coding_category_utf_16_le,
 600     coding_category_utf_16_be_nosig,
 601     coding_category_utf_16_le_nosig,
 602     coding_category_charset,
 603     coding_category_sjis,
 604     coding_category_big5,
 605     coding_category_ccl,
 606     coding_category_emacs_mule,
 607     /* All above are targets of code detection.  */
 608     coding_category_raw_text,
 609     coding_category_undecided,
 610     coding_category_max
 611   };
 612
 613 /* Definitions of flag bits used in detect_coding_XXXX.  */
 614 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 615 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 616 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 617 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 618 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 619 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 620 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 621 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 622 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 623 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 624 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 625 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 626 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 627 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 628 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 629 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 630 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 631 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 632 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 633 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 634
 635 /* This value is returned if detect_coding_mask () find nothing other
 636    than ASCII characters.  */
 637 #define CATEGORY_MASK_ANY               \
 638   (CATEGORY_MASK_ISO_7                  \
 639    | CATEGORY_MASK_ISO_7_TIGHT          \
 640    | CATEGORY_MASK_ISO_8_1              \
 641    | CATEGORY_MASK_ISO_8_2              \
 642    | CATEGORY_MASK_ISO_7_ELSE           \
 643    | CATEGORY_MASK_ISO_8_ELSE           \
 644    | CATEGORY_MASK_UTF_8_AUTO           \
 645    | CATEGORY_MASK_UTF_8_NOSIG          \
 646    | CATEGORY_MASK_UTF_8_SIG            \
 647    | CATEGORY_MASK_UTF_16_AUTO          \
 648    | CATEGORY_MASK_UTF_16_BE            \
 649    | CATEGORY_MASK_UTF_16_LE            \
 650    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 651    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 652    | CATEGORY_MASK_CHARSET              \
 653    | CATEGORY_MASK_SJIS                 \
 654    | CATEGORY_MASK_BIG5                 \
 655    | CATEGORY_MASK_CCL                  \
 656    | CATEGORY_MASK_EMACS_MULE)
 657
 658
 659 #define CATEGORY_MASK_ISO_7BIT \
 660   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 661
 662 #define CATEGORY_MASK_ISO_8BIT \
 663   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 664
 665 #define CATEGORY_MASK_ISO_ELSE \
 666   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 667
 668 #define CATEGORY_MASK_ISO_ESCAPE        \
 669   (CATEGORY_MASK_ISO_7                  \
 670    | CATEGORY_MASK_ISO_7_TIGHT          \
 671    | CATEGORY_MASK_ISO_7_ELSE           \
 672    | CATEGORY_MASK_ISO_8_ELSE)
 673
 674 #define CATEGORY_MASK_ISO       \
 675   (  CATEGORY_MASK_ISO_7BIT     \
 676      | CATEGORY_MASK_ISO_8BIT   \
 677      | CATEGORY_MASK_ISO_ELSE)
 678
 679 #define CATEGORY_MASK_UTF_16            \
 680   (CATEGORY_MASK_UTF_16_AUTO            \
 681    | CATEGORY_MASK_UTF_16_BE            \
 682    | CATEGORY_MASK_UTF_16_LE            \
 683    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 684    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 685
 686 #define CATEGORY_MASK_UTF_8     \
 687   (CATEGORY_MASK_UTF_8_AUTO     \
 688    | CATEGORY_MASK_UTF_8_NOSIG  \
 689    | CATEGORY_MASK_UTF_8_SIG)
 690
 691 /* List of symbols `coding-category-xxx' ordered by priority.  This
 692    variable is exposed to Emacs Lisp.  */
 693 static Lisp_Object Vcoding_category_list;
 694
 695 /* Table of coding categories (Lisp symbols).  This variable is for
 696    internal use oly.  */
 697 static Lisp_Object Vcoding_category_table;
 698
 699 /* Table of coding-categories ordered by priority.  */
 700 static enum coding_category coding_priorities[coding_category_max];
 701
 702 /* Nth element is a coding context for the coding system bound to the
 703    Nth coding category.  */
 704 static struct coding_system coding_categories[coding_category_max];
 705
 706 /*** Commonly used macros and functions ***/
 707
 708 #ifndef min
 709 #define min(a, b) ((a) < (b) ? (a) : (b))
 710 #endif
 711 #ifndef max
 712 #define max(a, b) ((a) > (b) ? (a) : (b))
 713 #endif
 714
 715 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 716   do {                                                  \
 717     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 718     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 719   } while (0)
 720
 721
 722 /* Safely get one byte from the source text pointed by SRC which ends
 723    at SRC_END, and set C to that byte.  If there are not enough bytes
 724    in the source, it jumps to `no_more_source'.  If multibytep is
 725    nonzero, and a multibyte character is found at SRC, set C to the
 726    negative value of the character code.  The caller should declare
 727    and set these variables appropriately in advance:
 728         src, src_end, multibytep */
 729
 730 #define ONE_MORE_BYTE(c)                                \
 731   do {                                                  \
 732     if (src == src_end)                                 \
 733       {                                                 \
 734         if (src_base < src)                             \
 735           record_conversion_result                      \
 736             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 737         goto no_more_source;                            \
 738       }                                                 \
 739     c = *src++;                                         \
 740     if (multibytep && (c & 0x80))                       \
 741       {                                                 \
 742         if ((c & 0xFE) == 0xC0)                         \
 743           c = ((c & 1) << 6) | *src++;                  \
 744         else                                            \
 745           {                                             \
 746             src--;                                      \
 747             c = - string_char (src, &src, NULL);        \
 748             record_conversion_result                    \
 749               (coding, CODING_RESULT_INVALID_SRC);      \
 750           }                                             \
 751       }                                                 \
 752     consumed_chars++;                                   \
 753   } while (0)
 754
 755 /* Safely get two bytes from the source text pointed by SRC which ends
 756    at SRC_END, and set C1 and C2 to those bytes while skipping the
 757    heading multibyte characters.  If there are not enough bytes in the
 758    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 759    a multibyte character is found for C2, set C2 to the negative value
 760    of the character code.  The caller should declare and set these
 761    variables appropriately in advance:
 762         src, src_end, multibytep
 763    It is intended that this macro is used in detect_coding_utf_16.  */
 764
 765 #define TWO_MORE_BYTES(c1, c2)                          \
 766   do {                                                  \
 767     do {                                                \
 768       if (src == src_end)                               \
 769         goto no_more_source;                            \
 770       c1 = *src++;                                      \
 771       if (multibytep && (c1 & 0x80))                    \
 772         {                                               \
 773           if ((c1 & 0xFE) == 0xC0)                      \
 774             c1 = ((c1 & 1) << 6) | *src++;              \
 775           else                                          \
 776             {                                           \
 777               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 778               c1 = -1;                                  \
 779             }                                           \
 780         }                                               \
 781     } while (c1 < 0);                                   \
 782     if (src == src_end)                                 \
 783       goto no_more_source;                              \
 784     c2 = *src++;                                        \
 785     if (multibytep && (c2 & 0x80))                      \
 786       {                                                 \
 787         if ((c2 & 0xFE) == 0xC0)                        \
 788           c2 = ((c2 & 1) << 6) | *src++;                \
 789         else                                            \
 790           c2 = -1;                                      \
 791       }                                                 \
 792   } while (0)
 793
 794
 795 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 796   do {                                                  \
 797     c = *src++;                                         \
 798     if (multibytep && (c & 0x80))                       \
 799       {                                                 \
 800         if ((c & 0xFE) == 0xC0)                         \
 801           c = ((c & 1) << 6) | *src++;                  \
 802         else                                            \
 803           {                                             \
 804             src--;                                      \
 805             c = - string_char (src, &src, NULL);        \
 806             record_conversion_result                    \
 807               (coding, CODING_RESULT_INVALID_SRC);      \
 808           }                                             \
 809       }                                                 \
 810     consumed_chars++;                                   \
 811   } while (0)
 812
 813
 814 /* Store a byte C in the place pointed by DST and increment DST to the
 815    next free point, and increment PRODUCED_CHARS.  The caller should
 816    assure that C is 0..127, and declare and set the variable `dst'
 817    appropriately in advance.
 818 */
 819
 820
 821 #define EMIT_ONE_ASCII_BYTE(c)  \
 822   do {                          \
 823     produced_chars++;           \
 824     *dst++ = (c);               \
 825   } while (0)
 826
 827
 828 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 829
 830 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 831   do {                                  \
 832     produced_chars += 2;                \
 833     *dst++ = (c1), *dst++ = (c2);       \
 834   } while (0)
 835
 836
 837 /* Store a byte C in the place pointed by DST and increment DST to the
 838    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 839    nonzero, store in an appropriate multibyte from.  The caller should
 840    declare and set the variables `dst' and `multibytep' appropriately
 841    in advance.  */
 842
 843 #define EMIT_ONE_BYTE(c)                \
 844   do {                                  \
 845     produced_chars++;                   \
 846     if (multibytep)                     \
 847       {                                 \
 848         int ch = (c);                   \
 849         if (ch >= 0x80)                 \
 850           ch = BYTE8_TO_CHAR (ch);      \
 851         CHAR_STRING_ADVANCE (ch, dst);  \
 852       }                                 \
 853     else                                \
 854       *dst++ = (c);                     \
 855   } while (0)
 856
 857
 858 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 859
 860 #define EMIT_TWO_BYTES(c1, c2)          \
 861   do {                                  \
 862     produced_chars += 2;                \
 863     if (multibytep)                     \
 864       {                                 \
 865         int ch;                         \
 866                                         \
 867         ch = (c1);                      \
 868         if (ch >= 0x80)                 \
 869           ch = BYTE8_TO_CHAR (ch);      \
 870         CHAR_STRING_ADVANCE (ch, dst);  \
 871         ch = (c2);                      \
 872         if (ch >= 0x80)                 \
 873           ch = BYTE8_TO_CHAR (ch);      \
 874         CHAR_STRING_ADVANCE (ch, dst);  \
 875       }                                 \
 876     else                                \
 877       {                                 \
 878         *dst++ = (c1);                  \
 879         *dst++ = (c2);                  \
 880       }                                 \
 881   } while (0)
 882
 883
 884 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 885   do {                                  \
 886     EMIT_ONE_BYTE (c1);                 \
 887     EMIT_TWO_BYTES (c2, c3);            \
 888   } while (0)
 889
 890
 891 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 892   do {                                          \
 893     EMIT_TWO_BYTES (c1, c2);                    \
 894     EMIT_TWO_BYTES (c3, c4);                    \
 895   } while (0)
 896
 897
 898 /* Prototypes for static functions.  */
 899 static void record_conversion_result P_ ((struct coding_system *coding,
 900                                           enum coding_result_code result));
 901 static int detect_coding_utf_8 P_ ((struct coding_system *,
 902                                     struct coding_detection_info *info));
 903 static void decode_coding_utf_8 P_ ((struct coding_system *));
 904 static int encode_coding_utf_8 P_ ((struct coding_system *));
 905
 906 static int detect_coding_utf_16 P_ ((struct coding_system *,
 907                                      struct coding_detection_info *info));
 908 static void decode_coding_utf_16 P_ ((struct coding_system *));
 909 static int encode_coding_utf_16 P_ ((struct coding_system *));
 910
 911 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 912                                        struct coding_detection_info *info));
 913 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 914 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 915
 916 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 917                                          struct coding_detection_info *info));
 918 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 919 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 920
 921 static int detect_coding_sjis P_ ((struct coding_system *,
 922                                    struct coding_detection_info *info));
 923 static void decode_coding_sjis P_ ((struct coding_system *));
 924 static int encode_coding_sjis P_ ((struct coding_system *));
 925
 926 static int detect_coding_big5 P_ ((struct coding_system *,
 927                                    struct coding_detection_info *info));
 928 static void decode_coding_big5 P_ ((struct coding_system *));
 929 static int encode_coding_big5 P_ ((struct coding_system *));
 930
 931 static int detect_coding_ccl P_ ((struct coding_system *,
 932                                   struct coding_detection_info *info));
 933 static void decode_coding_ccl P_ ((struct coding_system *));
 934 static int encode_coding_ccl P_ ((struct coding_system *));
 935
 936 static void decode_coding_raw_text P_ ((struct coding_system *));
 937 static int encode_coding_raw_text P_ ((struct coding_system *));
 938
 939 static void coding_set_source P_ ((struct coding_system *));
 940 static void coding_set_destination P_ ((struct coding_system *));
 941 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 942 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 943                                             EMACS_INT, EMACS_INT));
 944 static unsigned char *alloc_destination P_ ((struct coding_system *,
 945                                              EMACS_INT, unsigned char *));
 946 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 947 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 948                                                      int *, int *,
 949                                                      unsigned char *));
 950 static int detect_eol P_ ((const unsigned char *,
 951                            EMACS_INT, enum coding_category));
 952 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 953 static void decode_eol P_ ((struct coding_system *));
 954 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 955 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *));
 956 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 957 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 958                                         EMACS_INT));
 959 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 960 static int decode_coding P_ ((struct coding_system *));
 961 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 962                                                       struct coding_system *,
 963                                                       int *, EMACS_INT *));
 964 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 965                                                   struct coding_system *,
 966                                                   int *, EMACS_INT *));
 967 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 968 static int encode_coding P_ ((struct coding_system *));
 969 static Lisp_Object make_conversion_work_buffer P_ ((int));
 970 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 971 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 972 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 973
 974 static void
 975 record_conversion_result (struct coding_system *coding,
 976                           enum coding_result_code result)
 977 {
 978   coding->result = result;
 979   switch (result)
 980     {
 981     case CODING_RESULT_INSUFFICIENT_SRC:
 982       Vlast_code_conversion_error = Qinsufficient_source;
 983       break;
 984     case CODING_RESULT_INCONSISTENT_EOL:
 985       Vlast_code_conversion_error = Qinconsistent_eol;
 986       break;
 987     case CODING_RESULT_INVALID_SRC:
 988       Vlast_code_conversion_error = Qinvalid_source;
 989       break;
 990     case CODING_RESULT_INTERRUPT:
 991       Vlast_code_conversion_error = Qinterrupted;
 992       break;
 993     case CODING_RESULT_INSUFFICIENT_MEM:
 994       Vlast_code_conversion_error = Qinsufficient_memory;
 995       break;
 996     case CODING_RESULT_SUCCESS:
 997       break;
 998     default:
 999       Vlast_code_conversion_error = intern ("Unknown error");
1000     }
1001 }
1002
1003 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1004   do {                                                                       \
1005     charset_map_loaded = 0;                                                  \
1006     c = DECODE_CHAR (charset, code);                                         \
1007     if (charset_map_loaded)                                                  \
1008       {                                                                      \
1009         const unsigned char *orig = coding->source;                          \
1010         EMACS_INT offset;                                                    \
1011                                                                              \
1012         coding_set_source (coding);                                          \
1013         offset = coding->source - orig;                                      \
1014         src += offset;                                                       \
1015         src_base += offset;                                                  \
1016         src_end += offset;                                                   \
1017       }                                                                      \
1018   } while (0)
1019
1020
1021 /* If there are at least BYTES length of room at dst, allocate memory
1022    for coding->destination and update dst and dst_end.  We don't have
1023    to take care of coding->source which will be relocated.  It is
1024    handled by calling coding_set_source in encode_coding.  */
1025
1026 #define ASSURE_DESTINATION(bytes)                               \
1027   do {                                                          \
1028     if (dst + (bytes) >= dst_end)                               \
1029       {                                                         \
1030         int more_bytes = charbuf_end - charbuf + (bytes);       \
1031                                                                 \
1032         dst = alloc_destination (coding, more_bytes, dst);      \
1033         dst_end = coding->destination + coding->dst_bytes;      \
1034       }                                                         \
1035   } while (0)
1036
1037
1038 /* Store multibyte form of the character C in P, and advance P to the
1039    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1040    never calls MAYBE_UNIFY_CHAR.  */
1041
1042 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1043   do {                                          \
1044     if ((c) <= MAX_1_BYTE_CHAR)                 \
1045       *(p)++ = (c);                             \
1046     else if ((c) <= MAX_2_BYTE_CHAR)            \
1047       *(p)++ = (0xC0 | ((c) >> 6)),             \
1048         *(p)++ = (0x80 | ((c) & 0x3F));         \
1049     else if ((c) <= MAX_3_BYTE_CHAR)            \
1050       *(p)++ = (0xE0 | ((c) >> 12)),            \
1051         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1052         *(p)++ = (0x80 | ((c) & 0x3F));         \
1053     else if ((c) <= MAX_4_BYTE_CHAR)            \
1054       *(p)++ = (0xF0 | (c >> 18)),              \
1055         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1056         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1057         *(p)++ = (0x80 | (c & 0x3F));           \
1058     else if ((c) <= MAX_5_BYTE_CHAR)            \
1059       *(p)++ = 0xF8,                            \
1060         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1061         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1062         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1063         *(p)++ = (0x80 | (c & 0x3F));           \
1064     else                                        \
1065       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1066   } while (0)
1067
1068
1069 /* Return the character code of character whose multibyte form is at
1070    P, and advance P to the end of the multibyte form.  This is like
1071    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1072
1073 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1074   (!((p)[0] & 0x80)                                             \
1075    ? *(p)++                                                     \
1076    : ! ((p)[0] & 0x20)                                          \
1077    ? ((p) += 2,                                                 \
1078       ((((p)[-2] & 0x1F) << 6)                                  \
1079        | ((p)[-1] & 0x3F)                                       \
1080        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1081    : ! ((p)[0] & 0x10)                                          \
1082    ? ((p) += 3,                                                 \
1083       ((((p)[-3] & 0x0F) << 12)                                 \
1084        | (((p)[-2] & 0x3F) << 6)                                \
1085        | ((p)[-1] & 0x3F)))                                     \
1086    : ! ((p)[0] & 0x08)                                          \
1087    ? ((p) += 4,                                                 \
1088       ((((p)[-4] & 0xF) << 18)                                  \
1089        | (((p)[-3] & 0x3F) << 12)                               \
1090        | (((p)[-2] & 0x3F) << 6)                                \
1091        | ((p)[-1] & 0x3F)))                                     \
1092    : ((p) += 5,                                                 \
1093       ((((p)[-4] & 0x3F) << 18)                                 \
1094        | (((p)[-3] & 0x3F) << 12)                               \
1095        | (((p)[-2] & 0x3F) << 6)                                \
1096        | ((p)[-1] & 0x3F))))
1097
1098
1099 static void
1100 coding_set_source (coding)
1101      struct coding_system *coding;
1102 {
1103   if (BUFFERP (coding->src_object))
1104     {
1105       struct buffer *buf = XBUFFER (coding->src_object);
1106
1107       if (coding->src_pos < 0)
1108         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1109       else
1110         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1111     }
1112   else if (STRINGP (coding->src_object))
1113     {
1114       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1115     }
1116   else
1117     /* Otherwise, the source is C string and is never relocated
1118        automatically.  Thus we don't have to update anything.  */
1119     ;
1120 }
1121
1122 static void
1123 coding_set_destination (coding)
1124      struct coding_system *coding;
1125 {
1126   if (BUFFERP (coding->dst_object))
1127     {
1128       if (coding->src_pos < 0)
1129         {
1130           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1131           coding->dst_bytes = (GAP_END_ADDR
1132                                - (coding->src_bytes - coding->consumed)
1133                                - coding->destination);
1134         }
1135       else
1136         {
1137           /* We are sure that coding->dst_pos_byte is before the gap
1138              of the buffer. */
1139           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1140                                  + coding->dst_pos_byte - BEG_BYTE);
1141           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1142                                - coding->destination);
1143         }
1144     }
1145   else
1146     /* Otherwise, the destination is C string and is never relocated
1147        automatically.  Thus we don't have to update anything.  */
1148     ;
1149 }
1150
1151
1152 static void
1153 coding_alloc_by_realloc (coding, bytes)
1154      struct coding_system *coding;
1155      EMACS_INT bytes;
1156 {
1157   coding->destination = (unsigned char *) xrealloc (coding->destination,
1158                                                     coding->dst_bytes + bytes);
1159   coding->dst_bytes += bytes;
1160 }
1161
1162 static void
1163 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1164      struct coding_system *coding;
1165      EMACS_INT gap_head_used, bytes;
1166 {
1167   if (EQ (coding->src_object, coding->dst_object))
1168     {
1169       /* The gap may contain the produced data at the head and not-yet
1170          consumed data at the tail.  To preserve those data, we at
1171          first make the gap size to zero, then increase the gap
1172          size.  */
1173       EMACS_INT add = GAP_SIZE;
1174
1175       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1176       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1177       make_gap (bytes);
1178       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1179       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1180     }
1181   else
1182     {
1183       Lisp_Object this_buffer;
1184
1185       this_buffer = Fcurrent_buffer ();
1186       set_buffer_internal (XBUFFER (coding->dst_object));
1187       make_gap (bytes);
1188       set_buffer_internal (XBUFFER (this_buffer));
1189     }
1190 }
1191
1192
1193 static unsigned char *
1194 alloc_destination (coding, nbytes, dst)
1195      struct coding_system *coding;
1196      EMACS_INT nbytes;
1197      unsigned char *dst;
1198 {
1199   EMACS_INT offset = dst - coding->destination;
1200
1201   if (BUFFERP (coding->dst_object))
1202     {
1203       struct buffer *buf = XBUFFER (coding->dst_object);
1204
1205       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1206     }
1207   else
1208     coding_alloc_by_realloc (coding, nbytes);
1209   coding_set_destination (coding);
1210   dst = coding->destination + offset;
1211   return dst;
1212 }
1213
1214 /** Macros for annotations.  */
1215
1216 /* An annotation data is stored in the array coding->charbuf in this
1217    format:
1218      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1219    LENGTH is the number of elements in the annotation.
1220    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1221    NCHARS is the number of characters in the text annotated.
1222
1223    The format of the following elements depend on ANNOTATION_MASK.
1224
1225    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1226    follows:
1227      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1228
1229    NBYTES is the number of bytes specified in the header part of
1230    old-style emacs-mule encoding, or 0 for the other kind of
1231    composition.
1232
1233    METHOD is one of enum composition_method.
1234
1235    Optionnal COMPOSITION-COMPONENTS are characters and composition
1236    rules.
1237
1238    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1239    follows.
1240
1241    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1242    recover from an invalid annotation, and should be skipped by
1243    produce_annotation.  */
1244
1245 /* Maximum length of the header of annotation data.  */
1246 #define MAX_ANNOTATION_LENGTH 5
1247
1248 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1249   do {                                                  \
1250     *(buf)++ = -(len);                                  \
1251     *(buf)++ = (mask);                                  \
1252     *(buf)++ = (nchars);                                \
1253     coding->annotated = 1;                              \
1254   } while (0);
1255
1256 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1257   do {                                                                      \
1258     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1259     *buf++ = nbytes;                                                        \
1260     *buf++ = method;                                                        \
1261   } while (0)
1262
1263
1264 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1265   do {                                                                  \
1266     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1267     *buf++ = id;                                                        \
1268   } while (0)
1269
1270 \f
1271 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1272
1273
1274
1275 \f
1276 /*** 3. UTF-8 ***/
1277
1278 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1279    Check if a text is encoded in UTF-8.  If it is, return 1, else
1280    return 0.  */
1281
1282 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1283 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1284 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1285 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1286 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1287 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1288
1289 #define UTF_BOM 0xFEFF
1290 #define UTF_8_BOM_1 0xEF
1291 #define UTF_8_BOM_2 0xBB
1292 #define UTF_8_BOM_3 0xBF
1293
1294 static int
1295 detect_coding_utf_8 (coding, detect_info)
1296      struct coding_system *coding;
1297      struct coding_detection_info *detect_info;
1298 {
1299   const unsigned char *src = coding->source, *src_base;
1300   const unsigned char *src_end = coding->source + coding->src_bytes;
1301   int multibytep = coding->src_multibyte;
1302   int consumed_chars = 0;
1303   int bom_found = 0;
1304   int found = 0;
1305
1306   detect_info->checked |= CATEGORY_MASK_UTF_8;
1307   /* A coding system of this category is always ASCII compatible.  */
1308   src += coding->head_ascii;
1309
1310   while (1)
1311     {
1312       int c, c1, c2, c3, c4;
1313
1314       src_base = src;
1315       ONE_MORE_BYTE (c);
1316       if (c < 0 || UTF_8_1_OCTET_P (c))
1317         continue;
1318       ONE_MORE_BYTE (c1);
1319       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1320         break;
1321       if (UTF_8_2_OCTET_LEADING_P (c))
1322         {
1323           found = 1;
1324           continue;
1325         }
1326       ONE_MORE_BYTE (c2);
1327       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1328         break;
1329       if (UTF_8_3_OCTET_LEADING_P (c))
1330         {
1331           found = 1;
1332           if (src_base == coding->source
1333               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1334             bom_found = 1;
1335           continue;
1336         }
1337       ONE_MORE_BYTE (c3);
1338       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1339         break;
1340       if (UTF_8_4_OCTET_LEADING_P (c))
1341         {
1342           found = 1;
1343           continue;
1344         }
1345       ONE_MORE_BYTE (c4);
1346       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1347         break;
1348       if (UTF_8_5_OCTET_LEADING_P (c))
1349         {
1350           found = 1;
1351           continue;
1352         }
1353       break;
1354     }
1355   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1356   return 0;
1357
1358  no_more_source:
1359   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1360     {
1361       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1362       return 0;
1363     }
1364   if (bom_found)
1365     {
1366       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1367       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1368     }
1369   else
1370     {
1371       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1372       if (found)
1373         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1374     }
1375   return 1;
1376 }
1377
1378
1379 static void
1380 decode_coding_utf_8 (coding)
1381      struct coding_system *coding;
1382 {
1383   const unsigned char *src = coding->source + coding->consumed;
1384   const unsigned char *src_end = coding->source + coding->src_bytes;
1385   const unsigned char *src_base;
1386   int *charbuf = coding->charbuf + coding->charbuf_used;
1387   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1388   int consumed_chars = 0, consumed_chars_base = 0;
1389   int multibytep = coding->src_multibyte;
1390   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1391   Lisp_Object attr, charset_list;
1392   int eol_crlf =
1393     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1394   int byte_after_cr = -1;
1395
1396   CODING_GET_INFO (coding, attr, charset_list);
1397
1398   if (bom != utf_without_bom)
1399     {
1400       int c1, c2, c3;
1401
1402       src_base = src;
1403       ONE_MORE_BYTE (c1);
1404       if (! UTF_8_3_OCTET_LEADING_P (c1))
1405         src = src_base;
1406       else
1407         {
1408           ONE_MORE_BYTE (c2);
1409           if (! UTF_8_EXTRA_OCTET_P (c2))
1410             src = src_base;
1411           else
1412             {
1413               ONE_MORE_BYTE (c3);
1414               if (! UTF_8_EXTRA_OCTET_P (c3))
1415                 src = src_base;
1416               else
1417                 {
1418                   if ((c1 != UTF_8_BOM_1)
1419                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1420                     src = src_base;
1421                   else
1422                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1423                 }
1424             }
1425         }
1426     }
1427   CODING_UTF_8_BOM (coding) = utf_without_bom;
1428
1429
1430
1431   while (1)
1432     {
1433       int c, c1, c2, c3, c4, c5;
1434
1435       src_base = src;
1436       consumed_chars_base = consumed_chars;
1437
1438       if (charbuf >= charbuf_end)
1439         {
1440           if (byte_after_cr >= 0)
1441             src_base--;
1442           break;
1443         }
1444
1445       if (byte_after_cr >= 0)
1446         c1 = byte_after_cr, byte_after_cr = -1;
1447       else
1448         ONE_MORE_BYTE (c1);
1449       if (c1 < 0)
1450         {
1451           c = - c1;
1452         }
1453       else if (UTF_8_1_OCTET_P(c1))
1454         {
1455           if (eol_crlf && c1 == '\r')
1456             ONE_MORE_BYTE (byte_after_cr);
1457           c = c1;
1458         }
1459       else
1460         {
1461           ONE_MORE_BYTE (c2);
1462           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1463             goto invalid_code;
1464           if (UTF_8_2_OCTET_LEADING_P (c1))
1465             {
1466               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1467               /* Reject overlong sequences here and below.  Encoders
1468                  producing them are incorrect, they can be misleading,
1469                  and they mess up read/write invariance.  */
1470               if (c < 128)
1471                 goto invalid_code;
1472             }
1473           else
1474             {
1475               ONE_MORE_BYTE (c3);
1476               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1477                 goto invalid_code;
1478               if (UTF_8_3_OCTET_LEADING_P (c1))
1479                 {
1480                   c = (((c1 & 0xF) << 12)
1481                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1482                   if (c < 0x800
1483                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1484                     goto invalid_code;
1485                 }
1486               else
1487                 {
1488                   ONE_MORE_BYTE (c4);
1489                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1490                     goto invalid_code;
1491                   if (UTF_8_4_OCTET_LEADING_P (c1))
1492                     {
1493                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1494                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1495                     if (c < 0x10000)
1496                       goto invalid_code;
1497                     }
1498                   else
1499                     {
1500                       ONE_MORE_BYTE (c5);
1501                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1502                         goto invalid_code;
1503                       if (UTF_8_5_OCTET_LEADING_P (c1))
1504                         {
1505                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1506                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1507                                | (c5 & 0x3F));
1508                           if ((c > MAX_CHAR) || (c < 0x200000))
1509                             goto invalid_code;
1510                         }
1511                       else
1512                         goto invalid_code;
1513                     }
1514                 }
1515             }
1516         }
1517
1518       *charbuf++ = c;
1519       continue;
1520
1521     invalid_code:
1522       src = src_base;
1523       consumed_chars = consumed_chars_base;
1524       ONE_MORE_BYTE (c);
1525       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1526       coding->errors++;
1527     }
1528
1529  no_more_source:
1530   coding->consumed_char += consumed_chars_base;
1531   coding->consumed = src_base - coding->source;
1532   coding->charbuf_used = charbuf - coding->charbuf;
1533 }
1534
1535
1536 static int
1537 encode_coding_utf_8 (coding)
1538      struct coding_system *coding;
1539 {
1540   int multibytep = coding->dst_multibyte;
1541   int *charbuf = coding->charbuf;
1542   int *charbuf_end = charbuf + coding->charbuf_used;
1543   unsigned char *dst = coding->destination + coding->produced;
1544   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1545   int produced_chars = 0;
1546   int c;
1547
1548   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1549     {
1550       ASSURE_DESTINATION (3);
1551       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1552       CODING_UTF_8_BOM (coding) = utf_without_bom;
1553     }
1554
1555   if (multibytep)
1556     {
1557       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1558
1559       while (charbuf < charbuf_end)
1560         {
1561           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1562
1563           ASSURE_DESTINATION (safe_room);
1564           c = *charbuf++;
1565           if (CHAR_BYTE8_P (c))
1566             {
1567               c = CHAR_TO_BYTE8 (c);
1568               EMIT_ONE_BYTE (c);
1569             }
1570           else
1571             {
1572               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1573               for (p = str; p < pend; p++)
1574                 EMIT_ONE_BYTE (*p);
1575             }
1576         }
1577     }
1578   else
1579     {
1580       int safe_room = MAX_MULTIBYTE_LENGTH;
1581
1582       while (charbuf < charbuf_end)
1583         {
1584           ASSURE_DESTINATION (safe_room);
1585           c = *charbuf++;
1586           if (CHAR_BYTE8_P (c))
1587             *dst++ = CHAR_TO_BYTE8 (c);
1588           else
1589             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1590           produced_chars++;
1591         }
1592     }
1593   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1594   coding->produced_char += produced_chars;
1595   coding->produced = dst - coding->destination;
1596   return 0;
1597 }
1598
1599
1600 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1601    Check if a text is encoded in one of UTF-16 based coding systems.
1602    If it is, return 1, else return 0.  */
1603
1604 #define UTF_16_HIGH_SURROGATE_P(val) \
1605   (((val) & 0xFC00) == 0xD800)
1606
1607 #define UTF_16_LOW_SURROGATE_P(val) \
1608   (((val) & 0xFC00) == 0xDC00)
1609
1610 #define UTF_16_INVALID_P(val)   \
1611   (((val) == 0xFFFE)            \
1612    || ((val) == 0xFFFF)         \
1613    || UTF_16_LOW_SURROGATE_P (val))
1614
1615
1616 static int
1617 detect_coding_utf_16 (coding, detect_info)
1618      struct coding_system *coding;
1619      struct coding_detection_info *detect_info;
1620 {
1621   const unsigned char *src = coding->source, *src_base = src;
1622   const unsigned char *src_end = coding->source + coding->src_bytes;
1623   int multibytep = coding->src_multibyte;
1624   int consumed_chars = 0;
1625   int c1, c2;
1626
1627   detect_info->checked |= CATEGORY_MASK_UTF_16;
1628   if (coding->mode & CODING_MODE_LAST_BLOCK
1629       && (coding->src_chars & 1))
1630     {
1631       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1632       return 0;
1633     }
1634
1635   TWO_MORE_BYTES (c1, c2);
1636   if ((c1 == 0xFF) && (c2 == 0xFE))
1637     {
1638       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1639                              | CATEGORY_MASK_UTF_16_AUTO);
1640       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1641                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1642                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1643     }
1644   else if ((c1 == 0xFE) && (c2 == 0xFF))
1645     {
1646       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1647                              | CATEGORY_MASK_UTF_16_AUTO);
1648       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1649                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1650                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1651     }
1652   else if (c2 < 0)
1653     {
1654       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1655       return 0;
1656     }
1657   else
1658     {
1659       /* We check the dispersion of Eth and Oth bytes where E is even and
1660          O is odd.  If both are high, we assume binary data.*/
1661       unsigned char e[256], o[256];
1662       unsigned e_num = 1, o_num = 1;
1663
1664       memset (e, 0, 256);
1665       memset (o, 0, 256);
1666       e[c1] = 1;
1667       o[c2] = 1;
1668
1669       detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1670                                 |CATEGORY_MASK_UTF_16_BE
1671                                 | CATEGORY_MASK_UTF_16_LE);
1672
1673       while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1674              != CATEGORY_MASK_UTF_16)
1675         {
1676           TWO_MORE_BYTES (c1, c2);
1677           if (c2 < 0)
1678             break;
1679           if (! e[c1])
1680             {
1681               e[c1] = 1;
1682               e_num++;
1683               if (e_num >= 128)
1684                 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1685             }
1686           if (! o[c2])
1687             {
1688               o[c2] = 1;
1689               o_num++;
1690               if (o_num >= 128)
1691                 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1692             }
1693         }
1694       return 0;
1695     }
1696
1697  no_more_source:
1698   return 1;
1699 }
1700
1701 static void
1702 decode_coding_utf_16 (coding)
1703      struct coding_system *coding;
1704 {
1705   const unsigned char *src = coding->source + coding->consumed;
1706   const unsigned char *src_end = coding->source + coding->src_bytes;
1707   const unsigned char *src_base;
1708   int *charbuf = coding->charbuf + coding->charbuf_used;
1709   /* We may produces at most 3 chars in one loop.  */
1710   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1711   int consumed_chars = 0, consumed_chars_base = 0;
1712   int multibytep = coding->src_multibyte;
1713   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1714   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1715   int surrogate = CODING_UTF_16_SURROGATE (coding);
1716   Lisp_Object attr, charset_list;
1717   int eol_crlf =
1718     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1719   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1720
1721   CODING_GET_INFO (coding, attr, charset_list);
1722
1723   if (bom == utf_with_bom)
1724     {
1725       int c, c1, c2;
1726
1727       src_base = src;
1728       ONE_MORE_BYTE (c1);
1729       ONE_MORE_BYTE (c2);
1730       c = (c1 << 8) | c2;
1731
1732       if (endian == utf_16_big_endian
1733           ? c != 0xFEFF : c != 0xFFFE)
1734         {
1735           /* The first two bytes are not BOM.  Treat them as bytes
1736              for a normal character.  */
1737           src = src_base;
1738           coding->errors++;
1739         }
1740       CODING_UTF_16_BOM (coding) = utf_without_bom;
1741     }
1742   else if (bom == utf_detect_bom)
1743     {
1744       /* We have already tried to detect BOM and failed in
1745          detect_coding.  */
1746       CODING_UTF_16_BOM (coding) = utf_without_bom;
1747     }
1748
1749   while (1)
1750     {
1751       int c, c1, c2;
1752
1753       src_base = src;
1754       consumed_chars_base = consumed_chars;
1755
1756       if (charbuf >= charbuf_end)
1757         {
1758           if (byte_after_cr1 >= 0)
1759             src_base -= 2;
1760           break;
1761         }
1762
1763       if (byte_after_cr1 >= 0)
1764         c1 = byte_after_cr1, byte_after_cr1 = -1;
1765       else
1766         ONE_MORE_BYTE (c1);
1767       if (c1 < 0)
1768         {
1769           *charbuf++ = -c1;
1770           continue;
1771         }
1772       if (byte_after_cr2 >= 0)
1773         c2 = byte_after_cr2, byte_after_cr2 = -1;
1774       else
1775         ONE_MORE_BYTE (c2);
1776       if (c2 < 0)
1777         {
1778           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1779           *charbuf++ = -c2;
1780           continue;
1781         }
1782       c = (endian == utf_16_big_endian
1783            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1784
1785       if (surrogate)
1786         {
1787           if (! UTF_16_LOW_SURROGATE_P (c))
1788             {
1789               if (endian == utf_16_big_endian)
1790                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1791               else
1792                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1793               *charbuf++ = c1;
1794               *charbuf++ = c2;
1795               coding->errors++;
1796               if (UTF_16_HIGH_SURROGATE_P (c))
1797                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1798               else
1799                 *charbuf++ = c;
1800             }
1801           else
1802             {
1803               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1804               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1805               *charbuf++ = 0x10000 + c;
1806             }
1807         }
1808       else
1809         {
1810           if (UTF_16_HIGH_SURROGATE_P (c))
1811             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1812           else
1813             {
1814               if (eol_crlf && c == '\r')
1815                 {
1816                   ONE_MORE_BYTE (byte_after_cr1);
1817                   ONE_MORE_BYTE (byte_after_cr2);
1818                 }
1819               *charbuf++ = c;
1820             }
1821         }
1822     }
1823
1824  no_more_source:
1825   coding->consumed_char += consumed_chars_base;
1826   coding->consumed = src_base - coding->source;
1827   coding->charbuf_used = charbuf - coding->charbuf;
1828 }
1829
1830 static int
1831 encode_coding_utf_16 (coding)
1832      struct coding_system *coding;
1833 {
1834   int multibytep = coding->dst_multibyte;
1835   int *charbuf = coding->charbuf;
1836   int *charbuf_end = charbuf + coding->charbuf_used;
1837   unsigned char *dst = coding->destination + coding->produced;
1838   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1839   int safe_room = 8;
1840   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1841   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1842   int produced_chars = 0;
1843   Lisp_Object attrs, charset_list;
1844   int c;
1845
1846   CODING_GET_INFO (coding, attrs, charset_list);
1847
1848   if (bom != utf_without_bom)
1849     {
1850       ASSURE_DESTINATION (safe_room);
1851       if (big_endian)
1852         EMIT_TWO_BYTES (0xFE, 0xFF);
1853       else
1854         EMIT_TWO_BYTES (0xFF, 0xFE);
1855       CODING_UTF_16_BOM (coding) = utf_without_bom;
1856     }
1857
1858   while (charbuf < charbuf_end)
1859     {
1860       ASSURE_DESTINATION (safe_room);
1861       c = *charbuf++;
1862       if (c > MAX_UNICODE_CHAR)
1863         c = coding->default_char;
1864
1865       if (c < 0x10000)
1866         {
1867           if (big_endian)
1868             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1869           else
1870             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1871         }
1872       else
1873         {
1874           int c1, c2;
1875
1876           c -= 0x10000;
1877           c1 = (c >> 10) + 0xD800;
1878           c2 = (c & 0x3FF) + 0xDC00;
1879           if (big_endian)
1880             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1881           else
1882             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1883         }
1884     }
1885   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1886   coding->produced = dst - coding->destination;
1887   coding->produced_char += produced_chars;
1888   return 0;
1889 }
1890
1891 \f
1892 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1893
1894 /* Emacs' internal format for representation of multiple character
1895    sets is a kind of multi-byte encoding, i.e. characters are
1896    represented by variable-length sequences of one-byte codes.
1897
1898    ASCII characters and control characters (e.g. `tab', `newline') are
1899    represented by one-byte sequences which are their ASCII codes, in
1900    the range 0x00 through 0x7F.
1901
1902    8-bit characters of the range 0x80..0x9F are represented by
1903    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1904    code + 0x20).
1905
1906    8-bit characters of the range 0xA0..0xFF are represented by
1907    one-byte sequences which are their 8-bit code.
1908
1909    The other characters are represented by a sequence of `base
1910    leading-code', optional `extended leading-code', and one or two
1911    `position-code's.  The length of the sequence is determined by the
1912    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1913    whereas extended leading-code and position-code take the range 0xA0
1914    through 0xFF.  See `charset.h' for more details about leading-code
1915    and position-code.
1916
1917    --- CODE RANGE of Emacs' internal format ---
1918    character set        range
1919    -------------        -----
1920    ascii                0x00..0x7F
1921    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1922    eight-bit-graphic    0xA0..0xBF
1923    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1924    ---------------------------------------------
1925
1926    As this is the internal character representation, the format is
1927    usually not used externally (i.e. in a file or in a data sent to a
1928    process).  But, it is possible to have a text externally in this
1929    format (i.e. by encoding by the coding system `emacs-mule').
1930
1931    In that case, a sequence of one-byte codes has a slightly different
1932    form.
1933
1934    At first, all characters in eight-bit-control are represented by
1935    one-byte sequences which are their 8-bit code.
1936
1937    Next, character composition data are represented by the byte
1938    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1939    where,
1940         METHOD is 0xF2 plus one of composition method (enum
1941         composition_method),
1942
1943         BYTES is 0xA0 plus a byte length of this composition data,
1944
1945         CHARS is 0xA0 plus a number of characters composed by this
1946         data,
1947
1948         COMPONENTs are characters of multibye form or composition
1949         rules encoded by two-byte of ASCII codes.
1950
1951    In addition, for backward compatibility, the following formats are
1952    also recognized as composition data on decoding.
1953
1954    0x80 MSEQ ...
1955    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1956
1957    Here,
1958         MSEQ is a multibyte form but in these special format:
1959           ASCII: 0xA0 ASCII_CODE+0x80,
1960           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1961         RULE is a one byte code of the range 0xA0..0xF0 that
1962         represents a composition rule.
1963   */
1964
1965 char emacs_mule_bytes[256];
1966
1967
1968 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1969    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1970    else return 0.  */
1971
1972 static int
1973 detect_coding_emacs_mule (coding, detect_info)
1974      struct coding_system *coding;
1975      struct coding_detection_info *detect_info;
1976 {
1977   const unsigned char *src = coding->source, *src_base;
1978   const unsigned char *src_end = coding->source + coding->src_bytes;
1979   int multibytep = coding->src_multibyte;
1980   int consumed_chars = 0;
1981   int c;
1982   int found = 0;
1983
1984   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1985   /* A coding system of this category is always ASCII compatible.  */
1986   src += coding->head_ascii;
1987
1988   while (1)
1989     {
1990       src_base = src;
1991       ONE_MORE_BYTE (c);
1992       if (c < 0)
1993         continue;
1994       if (c == 0x80)
1995         {
1996           /* Perhaps the start of composite character.  We simply skip
1997              it because analyzing it is too heavy for detecting.  But,
1998              at least, we check that the composite character
1999              constitutes of more than 4 bytes.  */
2000           const unsigned char *src_base;
2001
2002         repeat:
2003           src_base = src;
2004           do
2005             {
2006               ONE_MORE_BYTE (c);
2007             }
2008           while (c >= 0xA0);
2009
2010           if (src - src_base <= 4)
2011             break;
2012           found = CATEGORY_MASK_EMACS_MULE;
2013           if (c == 0x80)
2014             goto repeat;
2015         }
2016
2017       if (c < 0x80)
2018         {
2019           if (c < 0x20
2020               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2021             break;
2022         }
2023       else
2024         {
2025           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2026
2027           while (more_bytes > 0)
2028             {
2029               ONE_MORE_BYTE (c);
2030               if (c < 0xA0)
2031                 {
2032                   src--;        /* Unread the last byte.  */
2033                   break;
2034                 }
2035               more_bytes--;
2036             }
2037           if (more_bytes != 0)
2038             break;
2039           found = CATEGORY_MASK_EMACS_MULE;
2040         }
2041     }
2042   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2043   return 0;
2044
2045  no_more_source:
2046   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2047     {
2048       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2049       return 0;
2050     }
2051   detect_info->found |= found;
2052   return 1;
2053 }
2054
2055
2056 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2057    character.  If CMP_STATUS indicates that we must expect MSEQ or
2058    RULE described above, decode it and return the negative value of
2059    the deocded character or rule.  If an invalid byte is found, return
2060    -1.  If SRC is too short, return -2.  */
2061
2062 int
2063 emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
2064      struct coding_system *coding;
2065      const unsigned char *src;
2066      int *nbytes, *nchars, *id;
2067      struct composition_status *cmp_status;
2068 {
2069   const unsigned char *src_end = coding->source + coding->src_bytes;
2070   const unsigned char *src_base = src;
2071   int multibytep = coding->src_multibyte;
2072   struct charset *charset;
2073   unsigned code;
2074   int c;
2075   int consumed_chars = 0;
2076   int mseq_found = 0;
2077
2078   ONE_MORE_BYTE (c);
2079   if (c < 0)
2080     {
2081       c = -c;
2082       charset = emacs_mule_charset[0];
2083     }
2084   else
2085     {
2086       if (c >= 0xA0)
2087         {
2088           if (cmp_status->state != COMPOSING_NO
2089               && cmp_status->old_form)
2090             {
2091               if (cmp_status->state == COMPOSING_CHAR)
2092                 {
2093                   if (c == 0xA0)
2094                     {
2095                       ONE_MORE_BYTE (c);
2096                       c -= 0x80;
2097                       if (c < 0)
2098                         goto invalid_code;
2099                     }
2100                   else
2101                     c -= 0x20;
2102                   mseq_found = 1;
2103                 }
2104               else
2105                 {
2106                   *nbytes = src - src_base;
2107                   *nchars = consumed_chars;
2108                   return -c;
2109                 }
2110             }
2111           else
2112             goto invalid_code;
2113         }
2114
2115       switch (emacs_mule_bytes[c])
2116         {
2117         case 2:
2118           if (! (charset = emacs_mule_charset[c]))
2119             goto invalid_code;
2120           ONE_MORE_BYTE (c);
2121           if (c < 0xA0)
2122             goto invalid_code;
2123           code = c & 0x7F;
2124           break;
2125
2126         case 3:
2127           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2128               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2129             {
2130               ONE_MORE_BYTE (c);
2131               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
2132                 goto invalid_code;
2133               ONE_MORE_BYTE (c);
2134               if (c < 0xA0)
2135                 goto invalid_code;
2136               code = c & 0x7F;
2137             }
2138           else
2139             {
2140               if (! (charset = emacs_mule_charset[c]))
2141                 goto invalid_code;
2142               ONE_MORE_BYTE (c);
2143               if (c < 0xA0)
2144                 goto invalid_code;
2145               code = (c & 0x7F) << 8;
2146               ONE_MORE_BYTE (c);
2147               if (c < 0xA0)
2148                 goto invalid_code;
2149               code |= c & 0x7F;
2150             }
2151           break;
2152
2153         case 4:
2154           ONE_MORE_BYTE (c);
2155           if (c < 0 || ! (charset = emacs_mule_charset[c]))
2156             goto invalid_code;
2157           ONE_MORE_BYTE (c);
2158           if (c < 0xA0)
2159             goto invalid_code;
2160           code = (c & 0x7F) << 8;
2161           ONE_MORE_BYTE (c);
2162           if (c < 0xA0)
2163             goto invalid_code;
2164           code |= c & 0x7F;
2165           break;
2166
2167         case 1:
2168           code = c;
2169           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2170                                      ? charset_ascii : charset_eight_bit);
2171           break;
2172
2173         default:
2174           abort ();
2175         }
2176       c = DECODE_CHAR (charset, code);
2177       if (c < 0)
2178         goto invalid_code;
2179     }
2180   *nbytes = src - src_base;
2181   *nchars = consumed_chars;
2182   if (id)
2183     *id = charset->id;
2184   return (mseq_found ? -c : c);
2185
2186  no_more_source:
2187   return -2;
2188
2189  invalid_code:
2190   return -1;
2191 }
2192
2193
2194 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2195
2196 /* Handle these composition sequence ('|': the end of header elements,
2197    BYTES and CHARS >= 0xA0):
2198
2199    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2200    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2201    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2202
2203    and these old form:
2204
2205    (4) relative composition: 0x80 | MSEQ ... MSEQ
2206    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2207
2208    When the starter 0x80 and the following header elements are found,
2209    this annotation header is produced.
2210
2211         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2212
2213    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2214    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2215
2216    Then, upon reading the following elements, these codes are produced
2217    until the composition end is found:
2218
2219    (1) CHAR ... CHAR
2220    (2) ALT ... ALT CHAR ... CHAR
2221    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2222    (4) CHAR ... CHAR
2223    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2224
2225    When the composition end is found, LENGTH and NCHARS in the
2226    annotation header is updated as below:
2227
2228    (1) LENGTH: unchanged, NCHARS: unchanged
2229    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2230    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2231    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2232    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2233
2234    If an error is found while composing, the annotation header is
2235    changed to the original composition header (plus filler -1s) as
2236    below:
2237
2238    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2239    (5)          [ 0x80 0xFF -1 -1- -1 ]
2240
2241    and the sequence [ -2 DECODED-RULE ] is changed to the original
2242    byte sequence as below:
2243         o the original byte sequence is B: [ B -1 ]
2244         o the original byte sequence is B1 B2: [ B1 B2 ]
2245
2246    Most of the routines are implemented by macros because many
2247    variables and labels in the caller decode_coding_emacs_mule must be
2248    accessible, and they are usually called just once (thus doesn't
2249    increase the size of compiled object).  */
2250
2251 /* Decode a composition rule represented by C as a component of
2252    composition sequence of Emacs 20 style.  Set RULE to the decoded
2253    rule. */
2254
2255 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2256   do {                                                  \
2257     int gref, nref;                                     \
2258                                                         \
2259     c -= 0xA0;                                          \
2260     if (c < 0 || c >= 81)                               \
2261       goto invalid_code;                                \
2262     gref = c / 9, nref = c % 9;                         \
2263     if (gref == 4) gref = 10;                           \
2264     if (nref == 4) nref = 10;                           \
2265     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2266   } while (0)
2267
2268
2269 /* Decode a composition rule represented by C and the following byte
2270    at SRC as a component of composition sequence of Emacs 21 style.
2271    Set RULE to the decoded rule.  */
2272
2273 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2274   do {                                                  \
2275     int gref, nref;                                     \
2276                                                         \
2277     gref = c - 0x20;                                    \
2278     if (gref < 0 || gref >= 81)                         \
2279       goto invalid_code;                                \
2280     ONE_MORE_BYTE (c);                                  \
2281     nref = c - 0x20;                                    \
2282     if (nref < 0 || nref >= 81)                         \
2283       goto invalid_code;                                \
2284     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2285   } while (0)
2286
2287
2288 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2289    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2290    byte length of this composition information, CHARS is the number of
2291    characters composed by this composition.  */
2292
2293 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2294   do {                                                                  \
2295     enum composition_method method = c - 0xF2;                          \
2296     int *charbuf_base = charbuf;                                        \
2297     int nbytes, nchars;                                                 \
2298                                                                         \
2299     ONE_MORE_BYTE (c);                                                  \
2300     if (c < 0)                                                          \
2301       goto invalid_code;                                                \
2302     nbytes = c - 0xA0;                                                  \
2303     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2304       goto invalid_code;                                                \
2305     ONE_MORE_BYTE (c);                                                  \
2306     nchars = c - 0xA0;                                                  \
2307     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2308       goto invalid_code;                                                \
2309     cmp_status->old_form = 0;                                           \
2310     cmp_status->method = method;                                        \
2311     if (method == COMPOSITION_RELATIVE)                                 \
2312       cmp_status->state = COMPOSING_CHAR;                               \
2313     else                                                                \
2314       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2315     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2316     cmp_status->nchars = nchars;                                        \
2317     cmp_status->ncomps = nbytes - 4;                                    \
2318     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2319   } while (0)
2320
2321
2322 /* Start of Emacs 20 style format for relative composition.  */
2323
2324 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2325   do {                                                          \
2326     cmp_status->old_form = 1;                                   \
2327     cmp_status->method = COMPOSITION_RELATIVE;                  \
2328     cmp_status->state = COMPOSING_CHAR;                         \
2329     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2330     cmp_status->nchars = cmp_status->ncomps = 0;                \
2331     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2332   } while (0)
2333
2334
2335 /* Start of Emacs 20 style format for rule-base composition.  */
2336
2337 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2338   do {                                                          \
2339     cmp_status->old_form = 1;                                   \
2340     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2341     cmp_status->state = COMPOSING_CHAR;                         \
2342     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2343     cmp_status->nchars = cmp_status->ncomps = 0;                \
2344     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2345   } while (0)
2346
2347
2348 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2349   do {                                                  \
2350     const unsigned char *current_src = src;             \
2351                                                         \
2352     ONE_MORE_BYTE (c);                                  \
2353     if (c < 0)                                          \
2354       goto invalid_code;                                \
2355     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2356         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2357       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2358     else if (c < 0xA0)                                  \
2359       goto invalid_code;                                \
2360     else if (c < 0xC0)                                  \
2361       {                                                 \
2362         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2363         /* Re-read C as a composition component.  */    \
2364         src = current_src;                              \
2365       }                                                 \
2366     else if (c == 0xFF)                                 \
2367       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2368     else                                                \
2369       goto invalid_code;                                \
2370   } while (0)
2371
2372 #define EMACS_MULE_COMPOSITION_END()                            \
2373   do {                                                          \
2374     int idx = - cmp_status->length;                             \
2375                                                                 \
2376     if (cmp_status->old_form)                                   \
2377       charbuf[idx + 2] = cmp_status->nchars;                    \
2378     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2379       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2380     cmp_status->state = COMPOSING_NO;                           \
2381   } while (0)
2382
2383
2384 static int
2385 emacs_mule_finish_composition (charbuf, cmp_status)
2386      int *charbuf;
2387      struct composition_status *cmp_status;
2388 {
2389   int idx = - cmp_status->length;
2390   int new_chars;
2391
2392   if (cmp_status->old_form && cmp_status->nchars > 0)
2393     {
2394       charbuf[idx + 2] = cmp_status->nchars;
2395       new_chars = 0;
2396       if (cmp_status->method == COMPOSITION_WITH_RULE
2397           && cmp_status->state == COMPOSING_CHAR)
2398         {
2399           /* The last rule was invalid.  */
2400           int rule = charbuf[-1] + 0xA0;
2401
2402           charbuf[-2] = BYTE8_TO_CHAR (rule);
2403           charbuf[-1] = -1;
2404           new_chars = 1;
2405         }
2406     }
2407   else
2408     {
2409       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2410
2411       if (cmp_status->method == COMPOSITION_WITH_RULE)
2412         {
2413           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2414           charbuf[idx++] = -3;
2415           charbuf[idx++] = 0;
2416           new_chars = 1;
2417         }
2418       else
2419         {
2420           int nchars = charbuf[idx + 1] + 0xA0;
2421           int nbytes = charbuf[idx + 2] + 0xA0;
2422
2423           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2424           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2425           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2426           charbuf[idx++] = -1;
2427           new_chars = 4;
2428         }
2429     }
2430   cmp_status->state = COMPOSING_NO;
2431   return new_chars;
2432 }
2433
2434 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2435   do {                                                                    \
2436     if (cmp_status->state != COMPOSING_NO)                                \
2437       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2438   } while (0)
2439
2440
2441 static void
2442 decode_coding_emacs_mule (coding)
2443      struct coding_system *coding;
2444 {
2445   const unsigned char *src = coding->source + coding->consumed;
2446   const unsigned char *src_end = coding->source + coding->src_bytes;
2447   const unsigned char *src_base;
2448   int *charbuf = coding->charbuf + coding->charbuf_used;
2449   /* We may produce two annocations (charset and composition) in one
2450      loop and one more charset annocation at the end.  */
2451   int *charbuf_end
2452     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2453   int consumed_chars = 0, consumed_chars_base;
2454   int multibytep = coding->src_multibyte;
2455   Lisp_Object attrs, charset_list;
2456   int char_offset = coding->produced_char;
2457   int last_offset = char_offset;
2458   int last_id = charset_ascii;
2459   int eol_crlf =
2460     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2461   int byte_after_cr = -1;
2462   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2463
2464   CODING_GET_INFO (coding, attrs, charset_list);
2465
2466   if (cmp_status->state != COMPOSING_NO)
2467     {
2468       int i;
2469
2470       for (i = 0; i < cmp_status->length; i++)
2471         *charbuf++ = cmp_status->carryover[i];
2472       coding->annotated = 1;
2473     }
2474
2475   while (1)
2476     {
2477       int c, id;
2478
2479       src_base = src;
2480       consumed_chars_base = consumed_chars;
2481
2482       if (charbuf >= charbuf_end)
2483         {
2484           if (byte_after_cr >= 0)
2485             src_base--;
2486           break;
2487         }
2488
2489       if (byte_after_cr >= 0)
2490         c = byte_after_cr, byte_after_cr = -1;
2491       else
2492         ONE_MORE_BYTE (c);
2493
2494       if (c < 0 || c == 0x80)
2495         {
2496           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2497           if (c < 0)
2498             {
2499               *charbuf++ = -c;
2500               char_offset++;
2501             }
2502           else
2503             DECODE_EMACS_MULE_COMPOSITION_START ();
2504           continue;
2505         }
2506
2507       if (c < 0x80)
2508         {
2509           if (eol_crlf && c == '\r')
2510             ONE_MORE_BYTE (byte_after_cr);
2511           id = charset_ascii;
2512           if (cmp_status->state != COMPOSING_NO)
2513             {
2514               if (cmp_status->old_form)
2515                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2516               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2517                 cmp_status->ncomps--;
2518             }
2519         }
2520       else
2521         {
2522           int nchars, nbytes;
2523
2524           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2525                                cmp_status);
2526           if (c < 0)
2527             {
2528               if (c == -1)
2529                 goto invalid_code;
2530               if (c == -2)
2531                 break;
2532             }
2533           src = src_base + nbytes;
2534           consumed_chars = consumed_chars_base + nchars;
2535           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2536             cmp_status->ncomps -= nchars;
2537         }
2538
2539       /* Now if C >= 0, we found a normally encoded characer, if C <
2540          0, we found an old-style composition component character or
2541          rule.  */
2542
2543       if (cmp_status->state == COMPOSING_NO)
2544         {
2545           if (last_id != id)
2546             {
2547               if (last_id != charset_ascii)
2548                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2549                                   last_id);
2550               last_id = id;
2551               last_offset = char_offset;
2552             }
2553           *charbuf++ = c;
2554           char_offset++;
2555         }
2556       else if (cmp_status->state == COMPOSING_CHAR)
2557         {
2558           if (cmp_status->old_form)
2559             {
2560               if (c >= 0)
2561                 {
2562                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2563                   *charbuf++ = c;
2564                   char_offset++;
2565                 }
2566               else
2567                 {
2568                   *charbuf++ = -c;
2569                   cmp_status->nchars++;
2570                   cmp_status->length++;
2571                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2572                     EMACS_MULE_COMPOSITION_END ();
2573                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2574                     cmp_status->state = COMPOSING_RULE;
2575                 }
2576             }
2577           else
2578             {
2579               *charbuf++ = c;
2580               cmp_status->length++;
2581               cmp_status->nchars--;
2582               if (cmp_status->nchars == 0)
2583                 EMACS_MULE_COMPOSITION_END ();
2584             }
2585         }
2586       else if (cmp_status->state == COMPOSING_RULE)
2587         {
2588           int rule;
2589
2590           if (c >= 0)
2591             {
2592               EMACS_MULE_COMPOSITION_END ();
2593               *charbuf++ = c;
2594               char_offset++;
2595             }
2596           else
2597             {
2598               c = -c;
2599               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2600               if (rule < 0)
2601                 goto invalid_code;
2602               *charbuf++ = -2;
2603               *charbuf++ = rule;
2604               cmp_status->length += 2;
2605               cmp_status->state = COMPOSING_CHAR;
2606             }
2607         }
2608       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2609         {
2610           *charbuf++ = c;
2611           cmp_status->length++;
2612           if (cmp_status->ncomps == 0)
2613             cmp_status->state = COMPOSING_CHAR;
2614           else if (cmp_status->ncomps > 0)
2615             {
2616               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2617                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2618             }
2619           else
2620             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2621         }
2622       else                      /* COMPOSING_COMPONENT_RULE */
2623         {
2624           int rule;
2625
2626           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2627           if (rule < 0)
2628             goto invalid_code;
2629           *charbuf++ = -2;
2630           *charbuf++ = rule;
2631           cmp_status->length += 2;
2632           cmp_status->ncomps--;
2633           if (cmp_status->ncomps > 0)
2634             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2635           else
2636             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2637         }
2638       continue;
2639
2640     retry:
2641       src = src_base;
2642       consumed_chars = consumed_chars_base;
2643       continue;
2644
2645     invalid_code:
2646       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2647       src = src_base;
2648       consumed_chars = consumed_chars_base;
2649       ONE_MORE_BYTE (c);
2650       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2651       char_offset++;
2652       coding->errors++;
2653     }
2654
2655  no_more_source:
2656   if (cmp_status->state != COMPOSING_NO)
2657     {
2658       if (coding->mode & CODING_MODE_LAST_BLOCK)
2659         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2660       else
2661         {
2662           int i;
2663
2664           charbuf -= cmp_status->length;
2665           for (i = 0; i < cmp_status->length; i++)
2666             cmp_status->carryover[i] = charbuf[i];
2667         }
2668     }
2669   if (last_id != charset_ascii)
2670     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2671   coding->consumed_char += consumed_chars_base;
2672   coding->consumed = src_base - coding->source;
2673   coding->charbuf_used = charbuf - coding->charbuf;
2674 }
2675
2676
2677 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2678   do {                                          \
2679     if (id < 0xA0)                              \
2680       codes[0] = id, codes[1] = 0;              \
2681     else if (id < 0xE0)                         \
2682       codes[0] = 0x9A, codes[1] = id;           \
2683     else if (id < 0xF0)                         \
2684       codes[0] = 0x9B, codes[1] = id;           \
2685     else if (id < 0xF5)                         \
2686       codes[0] = 0x9C, codes[1] = id;           \
2687     else                                        \
2688       codes[0] = 0x9D, codes[1] = id;           \
2689   } while (0);
2690
2691
2692 static int
2693 encode_coding_emacs_mule (coding)
2694      struct coding_system *coding;
2695 {
2696   int multibytep = coding->dst_multibyte;
2697   int *charbuf = coding->charbuf;
2698   int *charbuf_end = charbuf + coding->charbuf_used;
2699   unsigned char *dst = coding->destination + coding->produced;
2700   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2701   int safe_room = 8;
2702   int produced_chars = 0;
2703   Lisp_Object attrs, charset_list;
2704   int c;
2705   int preferred_charset_id = -1;
2706
2707   CODING_GET_INFO (coding, attrs, charset_list);
2708   if (! EQ (charset_list, Vemacs_mule_charset_list))
2709     {
2710       CODING_ATTR_CHARSET_LIST (attrs)
2711         = charset_list = Vemacs_mule_charset_list;
2712     }
2713
2714   while (charbuf < charbuf_end)
2715     {
2716       ASSURE_DESTINATION (safe_room);
2717       c = *charbuf++;
2718
2719       if (c < 0)
2720         {
2721           /* Handle an annotation.  */
2722           switch (*charbuf)
2723             {
2724             case CODING_ANNOTATE_COMPOSITION_MASK:
2725               /* Not yet implemented.  */
2726               break;
2727             case CODING_ANNOTATE_CHARSET_MASK:
2728               preferred_charset_id = charbuf[3];
2729               if (preferred_charset_id >= 0
2730                   && NILP (Fmemq (make_number (preferred_charset_id),
2731                                   charset_list)))
2732                 preferred_charset_id = -1;
2733               break;
2734             default:
2735               abort ();
2736             }
2737           charbuf += -c - 1;
2738           continue;
2739         }
2740
2741       if (ASCII_CHAR_P (c))
2742         EMIT_ONE_ASCII_BYTE (c);
2743       else if (CHAR_BYTE8_P (c))
2744         {
2745           c = CHAR_TO_BYTE8 (c);
2746           EMIT_ONE_BYTE (c);
2747         }
2748       else
2749         {
2750           struct charset *charset;
2751           unsigned code;
2752           int dimension;
2753           int emacs_mule_id;
2754           unsigned char leading_codes[2];
2755
2756           if (preferred_charset_id >= 0)
2757             {
2758               charset = CHARSET_FROM_ID (preferred_charset_id);
2759               if (CHAR_CHARSET_P (c, charset))
2760                 code = ENCODE_CHAR (charset, c);
2761               else
2762                 charset = char_charset (c, charset_list, &code);
2763             }
2764           else
2765             charset = char_charset (c, charset_list, &code);
2766           if (! charset)
2767             {
2768               c = coding->default_char;
2769               if (ASCII_CHAR_P (c))
2770                 {
2771                   EMIT_ONE_ASCII_BYTE (c);
2772                   continue;
2773                 }
2774               charset = char_charset (c, charset_list, &code);
2775             }
2776           dimension = CHARSET_DIMENSION (charset);
2777           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2778           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2779           EMIT_ONE_BYTE (leading_codes[0]);
2780           if (leading_codes[1])
2781             EMIT_ONE_BYTE (leading_codes[1]);
2782           if (dimension == 1)
2783             EMIT_ONE_BYTE (code | 0x80);
2784           else
2785             {
2786               code |= 0x8080;
2787               EMIT_ONE_BYTE (code >> 8);
2788               EMIT_ONE_BYTE (code & 0xFF);
2789             }
2790         }
2791     }
2792   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2793   coding->produced_char += produced_chars;
2794   coding->produced = dst - coding->destination;
2795   return 0;
2796 }
2797
2798 \f
2799 /*** 7. ISO2022 handlers ***/
2800
2801 /* The following note describes the coding system ISO2022 briefly.
2802    Since the intention of this note is to help understand the
2803    functions in this file, some parts are NOT ACCURATE or are OVERLY
2804    SIMPLIFIED.  For thorough understanding, please refer to the
2805    original document of ISO2022.  This is equivalent to the standard
2806    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2807
2808    ISO2022 provides many mechanisms to encode several character sets
2809    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2810    is encoded using bytes less than 128.  This may make the encoded
2811    text a little bit longer, but the text passes more easily through
2812    several types of gateway, some of which strip off the MSB (Most
2813    Significant Bit).
2814
2815    There are two kinds of character sets: control character sets and
2816    graphic character sets.  The former contain control characters such
2817    as `newline' and `escape' to provide control functions (control
2818    functions are also provided by escape sequences).  The latter
2819    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2820    two control character sets and many graphic character sets.
2821
2822    Graphic character sets are classified into one of the following
2823    four classes, according to the number of bytes (DIMENSION) and
2824    number of characters in one dimension (CHARS) of the set:
2825    - DIMENSION1_CHARS94
2826    - DIMENSION1_CHARS96
2827    - DIMENSION2_CHARS94
2828    - DIMENSION2_CHARS96
2829
2830    In addition, each character set is assigned an identification tag,
2831    unique for each set, called the "final character" (denoted as <F>
2832    hereafter).  The <F> of each character set is decided by ECMA(*)
2833    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2834    (0x30..0x3F are for private use only).
2835
2836    Note (*): ECMA = European Computer Manufacturers Association
2837
2838    Here are examples of graphic character sets [NAME(<F>)]:
2839         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2840         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2841         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2842         o DIMENSION2_CHARS96 -- none for the moment
2843
2844    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2845         C0 [0x00..0x1F] -- control character plane 0
2846         GL [0x20..0x7F] -- graphic character plane 0
2847         C1 [0x80..0x9F] -- control character plane 1
2848         GR [0xA0..0xFF] -- graphic character plane 1
2849
2850    A control character set is directly designated and invoked to C0 or
2851    C1 by an escape sequence.  The most common case is that:
2852    - ISO646's  control character set is designated/invoked to C0, and
2853    - ISO6429's control character set is designated/invoked to C1,
2854    and usually these designations/invocations are omitted in encoded
2855    text.  In a 7-bit environment, only C0 can be used, and a control
2856    character for C1 is encoded by an appropriate escape sequence to
2857    fit into the environment.  All control characters for C1 are
2858    defined to have corresponding escape sequences.
2859
2860    A graphic character set is at first designated to one of four
2861    graphic registers (G0 through G3), then these graphic registers are
2862    invoked to GL or GR.  These designations and invocations can be
2863    done independently.  The most common case is that G0 is invoked to
2864    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2865    these invocations and designations are omitted in encoded text.
2866    In a 7-bit environment, only GL can be used.
2867
2868    When a graphic character set of CHARS94 is invoked to GL, codes
2869    0x20 and 0x7F of the GL area work as control characters SPACE and
2870    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2871    be used.
2872
2873    There are two ways of invocation: locking-shift and single-shift.
2874    With locking-shift, the invocation lasts until the next different
2875    invocation, whereas with single-shift, the invocation affects the
2876    following character only and doesn't affect the locking-shift
2877    state.  Invocations are done by the following control characters or
2878    escape sequences:
2879
2880    ----------------------------------------------------------------------
2881    abbrev  function                  cntrl escape seq   description
2882    ----------------------------------------------------------------------
2883    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2884    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2885    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2886    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2887    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2888    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2889    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2890    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2891    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2892    ----------------------------------------------------------------------
2893    (*) These are not used by any known coding system.
2894
2895    Control characters for these functions are defined by macros
2896    ISO_CODE_XXX in `coding.h'.
2897
2898    Designations are done by the following escape sequences:
2899    ----------------------------------------------------------------------
2900    escape sequence      description
2901    ----------------------------------------------------------------------
2902    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2903    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2904    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2905    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2906    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2907    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2908    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2909    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2910    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2911    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2912    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2913    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2914    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2915    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2916    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2917    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2918    ----------------------------------------------------------------------
2919
2920    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2921    of dimension 1, chars 94, and final character <F>, etc...
2922
2923    Note (*): Although these designations are not allowed in ISO2022,
2924    Emacs accepts them on decoding, and produces them on encoding
2925    CHARS96 character sets in a coding system which is characterized as
2926    7-bit environment, non-locking-shift, and non-single-shift.
2927
2928    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2929    '(' must be omitted.  We refer to this as "short-form" hereafter.
2930
2931    Now you may notice that there are a lot of ways of encoding the
2932    same multilingual text in ISO2022.  Actually, there exist many
2933    coding systems such as Compound Text (used in X11's inter client
2934    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2935    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2936    localized platforms), and all of these are variants of ISO2022.
2937
2938    In addition to the above, Emacs handles two more kinds of escape
2939    sequences: ISO6429's direction specification and Emacs' private
2940    sequence for specifying character composition.
2941
2942    ISO6429's direction specification takes the following form:
2943         o CSI ']'      -- end of the current direction
2944         o CSI '0' ']'  -- end of the current direction
2945         o CSI '1' ']'  -- start of left-to-right text
2946         o CSI '2' ']'  -- start of right-to-left text
2947    The control character CSI (0x9B: control sequence introducer) is
2948    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2949
2950    Character composition specification takes the following form:
2951         o ESC '0' -- start relative composition
2952         o ESC '1' -- end composition
2953         o ESC '2' -- start rule-base composition (*)
2954         o ESC '3' -- start relative composition with alternate chars  (**)
2955         o ESC '4' -- start rule-base composition with alternate chars  (**)
2956   Since these are not standard escape sequences of any ISO standard,
2957   the use of them with these meanings is restricted to Emacs only.
2958
2959   (*) This form is used only in Emacs 20.7 and older versions,
2960   but newer versions can safely decode it.
2961   (**) This form is used only in Emacs 21.1 and newer versions,
2962   and older versions can't decode it.
2963
2964   Here's a list of example usages of these composition escape
2965   sequences (categorized by `enum composition_method').
2966
2967   COMPOSITION_RELATIVE:
2968         ESC 0 CHAR [ CHAR ] ESC 1
2969   COMPOSITION_WITH_RULE:
2970         ESC 2 CHAR [ RULE CHAR ] ESC 1
2971   COMPOSITION_WITH_ALTCHARS:
2972         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2973   COMPOSITION_WITH_RULE_ALTCHARS:
2974         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2975
2976 enum iso_code_class_type iso_code_class[256];
2977
2978 #define SAFE_CHARSET_P(coding, id)      \
2979   ((id) <= (coding)->max_charset_id     \
2980    && (coding)->safe_charsets[id] != 255)
2981
2982
2983 #define SHIFT_OUT_OK(category)  \
2984   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2985
2986 static void
2987 setup_iso_safe_charsets (attrs)
2988      Lisp_Object attrs;
2989 {
2990   Lisp_Object charset_list, safe_charsets;
2991   Lisp_Object request;
2992   Lisp_Object reg_usage;
2993   Lisp_Object tail;
2994   int reg94, reg96;
2995   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2996   int max_charset_id;
2997
2998   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2999   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
3000       && ! EQ (charset_list, Viso_2022_charset_list))
3001     {
3002       CODING_ATTR_CHARSET_LIST (attrs)
3003         = charset_list = Viso_2022_charset_list;
3004       ASET (attrs, coding_attr_safe_charsets, Qnil);
3005     }
3006
3007   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3008     return;
3009
3010   max_charset_id = 0;
3011   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3012     {
3013       int id = XINT (XCAR (tail));
3014       if (max_charset_id < id)
3015         max_charset_id = id;
3016     }
3017
3018   safe_charsets = make_uninit_string (max_charset_id + 1);
3019   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
3020   request = AREF (attrs, coding_attr_iso_request);
3021   reg_usage = AREF (attrs, coding_attr_iso_usage);
3022   reg94 = XINT (XCAR (reg_usage));
3023   reg96 = XINT (XCDR (reg_usage));
3024
3025   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3026     {
3027       Lisp_Object id;
3028       Lisp_Object reg;
3029       struct charset *charset;
3030
3031       id = XCAR (tail);
3032       charset = CHARSET_FROM_ID (XINT (id));
3033       reg = Fcdr (Fassq (id, request));
3034       if (! NILP (reg))
3035         SSET (safe_charsets, XINT (id), XINT (reg));
3036       else if (charset->iso_chars_96)
3037         {
3038           if (reg96 < 4)
3039             SSET (safe_charsets, XINT (id), reg96);
3040         }
3041       else
3042         {
3043           if (reg94 < 4)
3044             SSET (safe_charsets, XINT (id), reg94);
3045         }
3046     }
3047   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3048 }
3049
3050
3051 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3052    Check if a text is encoded in one of ISO-2022 based codig systems.
3053    If it is, return 1, else return 0.  */
3054
3055 static int
3056 detect_coding_iso_2022 (coding, detect_info)
3057      struct coding_system *coding;
3058      struct coding_detection_info *detect_info;
3059 {
3060   const unsigned char *src = coding->source, *src_base = src;
3061   const unsigned char *src_end = coding->source + coding->src_bytes;
3062   int multibytep = coding->src_multibyte;
3063   int single_shifting = 0;
3064   int id;
3065   int c, c1;
3066   int consumed_chars = 0;
3067   int i;
3068   int rejected = 0;
3069   int found = 0;
3070   int composition_count = -1;
3071
3072   detect_info->checked |= CATEGORY_MASK_ISO;
3073
3074   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3075     {
3076       struct coding_system *this = &(coding_categories[i]);
3077       Lisp_Object attrs, val;
3078
3079       if (this->id < 0)
3080         continue;
3081       attrs = CODING_ID_ATTRS (this->id);
3082       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3083           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3084         setup_iso_safe_charsets (attrs);
3085       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3086       this->max_charset_id = SCHARS (val) - 1;
3087       this->safe_charsets = SDATA (val);
3088     }
3089
3090   /* A coding system of this category is always ASCII compatible.  */
3091   src += coding->head_ascii;
3092
3093   while (rejected != CATEGORY_MASK_ISO)
3094     {
3095       src_base = src;
3096       ONE_MORE_BYTE (c);
3097       switch (c)
3098         {
3099         case ISO_CODE_ESC:
3100           if (inhibit_iso_escape_detection)
3101             break;
3102           single_shifting = 0;
3103           ONE_MORE_BYTE (c);
3104           if (c >= '(' && c <= '/')
3105             {
3106               /* Designation sequence for a charset of dimension 1.  */
3107               ONE_MORE_BYTE (c1);
3108               if (c1 < ' ' || c1 >= 0x80
3109                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3110                 /* Invalid designation sequence.  Just ignore.  */
3111                 break;
3112             }
3113           else if (c == '$')
3114             {
3115               /* Designation sequence for a charset of dimension 2.  */
3116               ONE_MORE_BYTE (c);
3117               if (c >= '@' && c <= 'B')
3118                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3119                 id = iso_charset_table[1][0][c];
3120               else if (c >= '(' && c <= '/')
3121                 {
3122                   ONE_MORE_BYTE (c1);
3123                   if (c1 < ' ' || c1 >= 0x80
3124                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3125                     /* Invalid designation sequence.  Just ignore.  */
3126                     break;
3127                 }
3128               else
3129                 /* Invalid designation sequence.  Just ignore it.  */
3130                 break;
3131             }
3132           else if (c == 'N' || c == 'O')
3133             {
3134               /* ESC <Fe> for SS2 or SS3.  */
3135               single_shifting = 1;
3136               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3137               break;
3138             }
3139           else if (c == '1')
3140             {
3141               /* End of composition.  */
3142               if (composition_count < 0
3143                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3144                 /* Invalid */
3145                 break;
3146               composition_count = -1;
3147               found |= CATEGORY_MASK_ISO;
3148             }
3149           else if (c >= '0' && c <= '4')
3150             {
3151               /* ESC <Fp> for start/end composition.  */
3152               composition_count = 0;
3153               break;
3154             }
3155           else
3156             {
3157               /* Invalid escape sequence.  Just ignore it.  */
3158               break;
3159             }
3160
3161           /* We found a valid designation sequence for CHARSET.  */
3162           rejected |= CATEGORY_MASK_ISO_8BIT;
3163           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3164                               id))
3165             found |= CATEGORY_MASK_ISO_7;
3166           else
3167             rejected |= CATEGORY_MASK_ISO_7;
3168           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3169                               id))
3170             found |= CATEGORY_MASK_ISO_7_TIGHT;
3171           else
3172             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3173           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3174                               id))
3175             found |= CATEGORY_MASK_ISO_7_ELSE;
3176           else
3177             rejected |= CATEGORY_MASK_ISO_7_ELSE;
3178           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3179                               id))
3180             found |= CATEGORY_MASK_ISO_8_ELSE;
3181           else
3182             rejected |= CATEGORY_MASK_ISO_8_ELSE;
3183           break;
3184
3185         case ISO_CODE_SO:
3186         case ISO_CODE_SI:
3187           /* Locking shift out/in.  */
3188           if (inhibit_iso_escape_detection)
3189             break;
3190           single_shifting = 0;
3191           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3192           break;
3193
3194         case ISO_CODE_CSI:
3195           /* Control sequence introducer.  */
3196           single_shifting = 0;
3197           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3198           found |= CATEGORY_MASK_ISO_8_ELSE;
3199           goto check_extra_latin;
3200
3201         case ISO_CODE_SS2:
3202         case ISO_CODE_SS3:
3203           /* Single shift.   */
3204           if (inhibit_iso_escape_detection)
3205             break;
3206           single_shifting = 0;
3207           rejected |= CATEGORY_MASK_ISO_7BIT;
3208           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3209               & CODING_ISO_FLAG_SINGLE_SHIFT)
3210             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
3211           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3212               & CODING_ISO_FLAG_SINGLE_SHIFT)
3213             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3214           if (single_shifting)
3215             break;
3216           goto check_extra_latin;
3217
3218         default:
3219           if (c < 0)
3220             continue;
3221           if (c < 0x80)
3222             {
3223               if (composition_count >= 0)
3224                 composition_count++;
3225               single_shifting = 0;
3226               break;
3227             }
3228           if (c >= 0xA0)
3229             {
3230               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3231               found |= CATEGORY_MASK_ISO_8_1;
3232               /* Check the length of succeeding codes of the range
3233                  0xA0..0FF.  If the byte length is even, we include
3234                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3235                  only when we are not single shifting.  */
3236               if (! single_shifting
3237                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3238                 {
3239                   int i = 1;
3240                   while (src < src_end)
3241                     {
3242                       ONE_MORE_BYTE (c);
3243                       if (c < 0xA0)
3244                         break;
3245                       i++;
3246                     }
3247
3248                   if (i & 1 && src < src_end)
3249                     {
3250                       rejected |= CATEGORY_MASK_ISO_8_2;
3251                       if (composition_count >= 0)
3252                         composition_count += i;
3253                     }
3254                   else
3255                     {
3256                       found |= CATEGORY_MASK_ISO_8_2;
3257                       if (composition_count >= 0)
3258                         composition_count += i / 2;
3259                     }
3260                 }
3261               break;
3262             }
3263         check_extra_latin:
3264           single_shifting = 0;
3265           if (! VECTORP (Vlatin_extra_code_table)
3266               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3267             {
3268               rejected = CATEGORY_MASK_ISO;
3269               break;
3270             }
3271           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3272               & CODING_ISO_FLAG_LATIN_EXTRA)
3273             found |= CATEGORY_MASK_ISO_8_1;
3274           else
3275             rejected |= CATEGORY_MASK_ISO_8_1;
3276           rejected |= CATEGORY_MASK_ISO_8_2;
3277         }
3278     }
3279   detect_info->rejected |= CATEGORY_MASK_ISO;
3280   return 0;
3281
3282  no_more_source:
3283   detect_info->rejected |= rejected;
3284   detect_info->found |= (found & ~rejected);
3285   return 1;
3286 }
3287
3288
3289 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3290    escape sequence should be kept.  */
3291 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3292   do {                                                                  \
3293     int id, prev;                                                       \
3294                                                                         \
3295     if (final < '0' || final >= 128                                     \
3296         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3297         || !SAFE_CHARSET_P (coding, id))                                \
3298       {                                                                 \
3299         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3300         chars_96 = -1;                                                  \
3301         break;                                                          \
3302       }                                                                 \
3303     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3304     if (id == charset_jisx0201_roman)                                   \
3305       {                                                                 \
3306         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3307           id = charset_ascii;                                           \
3308       }                                                                 \
3309     else if (id == charset_jisx0208_1978)                               \
3310       {                                                                 \
3311         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3312           id = charset_jisx0208;                                        \
3313       }                                                                 \
3314     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3315     /* If there was an invalid designation to REG previously, and this  \
3316        designation is ASCII to REG, we should keep this designation     \
3317        sequence.  */                                                    \
3318     if (prev == -2 && id == charset_ascii)                              \
3319       chars_96 = -1;                                                    \
3320   } while (0)
3321
3322
3323 /* Handle these composition sequence (ALT: alternate char):
3324
3325    (1) relative composition: ESC 0 CHAR ... ESC 1
3326    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3327    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3328    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3329
3330    When the start sequence (ESC 0/2/3/4) is found, this annotation
3331    header is produced.
3332
3333         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3334
3335    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3336    produced until the end sequence (ESC 1) is found:
3337
3338    (1) CHAR ... CHAR
3339    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3340    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3341    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3342
3343    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3344    annotation header is updated as below:
3345
3346    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3347    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3348    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3349    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3350
3351    If an error is found while composing, the annotation header is
3352    changed to:
3353
3354         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3355
3356    and the sequence [ -2 DECODED-RULE ] is changed to the original
3357    byte sequence as below:
3358         o the original byte sequence is B: [ B -1 ]
3359         o the original byte sequence is B1 B2: [ B1 B2 ]
3360    and the sequence [ -1 -1 ] is changed to the original byte
3361    sequence:
3362         [ ESC '0' ]
3363 */
3364
3365 /* Decode a composition rule C1 and maybe one more byte from the
3366    source, and set RULE to the encoded composition rule, NBYTES to the
3367    length of the composition rule.  If the rule is invalid, set RULE
3368    to some negative value.  */
3369
3370 #define DECODE_COMPOSITION_RULE(rule, nbytes)                           \
3371   do {                                                                  \
3372     rule = c1 - 32;                                                     \
3373     if (rule < 0)                                                       \
3374       break;                                                            \
3375     if (rule < 81)              /* old format (before ver.21) */        \
3376       {                                                                 \
3377         int gref = (rule) / 9;                                          \
3378         int nref = (rule) % 9;                                          \
3379         if (gref == 4) gref = 10;                                       \
3380         if (nref == 4) nref = 10;                                       \
3381         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3382         nbytes = 1;                                                     \
3383       }                                                                 \
3384     else                        /* new format (after ver.21) */         \
3385       {                                                                 \
3386         int c;                                                          \
3387                                                                         \
3388         ONE_MORE_BYTE (c);                                              \
3389         rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32);             \
3390         if (rule >= 0)                                                  \
3391           rule += 0x100;   /* to destinguish it from the old format */  \
3392         nbytes = 2;                                                     \
3393       }                                                                 \
3394   } while (0)
3395
3396 #define ENCODE_COMPOSITION_RULE(rule)                           \
3397   do {                                                          \
3398     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3399                                                                 \
3400     if (rule < 0x100)           /* old format */                \
3401       {                                                         \
3402         if (gref == 10) gref = 4;                               \
3403         if (nref == 10) nref = 4;                               \
3404         charbuf[idx] = 32 + gref * 9 + nref;                    \
3405         charbuf[idx + 1] = -1;                                  \
3406         new_chars++;                                            \
3407       }                                                         \
3408     else                                /* new format */        \
3409       {                                                         \
3410         charbuf[idx] = 32 + 81 + gref;                          \
3411         charbuf[idx + 1] = 32 + nref;                           \
3412         new_chars += 2;                                         \
3413       }                                                         \
3414   } while (0)
3415
3416 /* Finish the current composition as invalid.  */
3417
3418 static int finish_composition P_ ((int *, struct composition_status *));
3419
3420 static int
3421 finish_composition (charbuf, cmp_status)
3422      int *charbuf;
3423      struct composition_status *cmp_status;
3424 {
3425   int idx = - cmp_status->length;
3426   int new_chars;
3427
3428   /* Recover the original ESC sequence */
3429   charbuf[idx++] = ISO_CODE_ESC;
3430   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3431                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3432                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3433                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3434                     : '4');
3435   charbuf[idx++] = -2;
3436   charbuf[idx++] = 0;
3437   charbuf[idx++] = -1;
3438   new_chars = cmp_status->nchars;
3439   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3440     for (; idx < 0; idx++)
3441       {
3442         int elt = charbuf[idx];
3443
3444         if (elt == -2)
3445           {
3446             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3447             idx++;
3448           }
3449         else if (elt == -1)
3450           {
3451             charbuf[idx++] = ISO_CODE_ESC;
3452             charbuf[idx] = '0';
3453             new_chars += 2;
3454           }
3455       }
3456   cmp_status->state = COMPOSING_NO;
3457   return new_chars;
3458 }
3459
3460 /* If characers are under composition, finish the composition.  */
3461 #define MAYBE_FINISH_COMPOSITION()                              \
3462   do {                                                          \
3463     if (cmp_status->state != COMPOSING_NO)                      \
3464       char_offset += finish_composition (charbuf, cmp_status);  \
3465   } while (0)
3466
3467 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3468
3469    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3470    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3471    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3472    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3473
3474    Produce this annotation sequence now:
3475
3476    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3477 */
3478
3479 #define DECODE_COMPOSITION_START(c1)                                       \
3480   do {                                                                     \
3481     if (c1 == '0'                                                          \
3482         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3483              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3484             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3485                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3486       {                                                                    \
3487         *charbuf++ = -1;                                                   \
3488         *charbuf++= -1;                                                    \
3489         cmp_status->state = COMPOSING_CHAR;                                \
3490         cmp_status->length += 2;                                           \
3491       }                                                                    \
3492     else                                                                   \
3493       {                                                                    \
3494         MAYBE_FINISH_COMPOSITION ();                                       \
3495         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3496                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3497                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3498                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3499         cmp_status->state                                                  \
3500           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3501         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3502         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3503         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3504         coding->annotated = 1;                                             \
3505       }                                                                    \
3506   } while (0)
3507
3508
3509 /* Handle composition end sequence ESC 1.  */
3510
3511 #define DECODE_COMPOSITION_END()                                        \
3512   do {                                                                  \
3513     if (cmp_status->nchars == 0                                         \
3514         || ((cmp_status->state == COMPOSING_CHAR)                       \
3515             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3516       {                                                                 \
3517         MAYBE_FINISH_COMPOSITION ();                                    \
3518         goto invalid_code;                                              \
3519       }                                                                 \
3520     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3521       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3522     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3523       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3524     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3525     char_offset += cmp_status->nchars;                                  \
3526     cmp_status->state = COMPOSING_NO;                                   \
3527   } while (0)
3528
3529 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3530
3531 #define STORE_COMPOSITION_RULE(rule)    \
3532   do {                                  \
3533     *charbuf++ = -2;                    \
3534     *charbuf++ = rule;                  \
3535     cmp_status->length += 2;            \
3536     cmp_status->state--;                \
3537   } while (0)
3538
3539 /* Store a composed char or a component char C in charbuf, and update
3540    cmp_status.  */
3541
3542 #define STORE_COMPOSITION_CHAR(c)                                       \
3543   do {                                                                  \
3544     *charbuf++ = (c);                                                   \
3545     cmp_status->length++;                                               \
3546     if (cmp_status->state == COMPOSING_CHAR)                            \
3547       cmp_status->nchars++;                                             \
3548     else                                                                \
3549       cmp_status->ncomps++;                                             \
3550     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3551         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3552             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3553       cmp_status->state++;                                              \
3554   } while (0)
3555
3556
3557 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3558
3559 static void
3560 decode_coding_iso_2022 (coding)
3561      struct coding_system *coding;
3562 {
3563   const unsigned char *src = coding->source + coding->consumed;
3564   const unsigned char *src_end = coding->source + coding->src_bytes;
3565   const unsigned char *src_base;
3566   int *charbuf = coding->charbuf + coding->charbuf_used;
3567   /* We may produce two annocations (charset and composition) in one
3568      loop and one more charset annocation at the end.  */
3569   int *charbuf_end
3570     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3571   int consumed_chars = 0, consumed_chars_base;
3572   int multibytep = coding->src_multibyte;
3573   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3574   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3575   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3576   int charset_id_2, charset_id_3;
3577   struct charset *charset;
3578   int c;
3579   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3580   Lisp_Object attrs, charset_list;
3581   int char_offset = coding->produced_char;
3582   int last_offset = char_offset;
3583   int last_id = charset_ascii;
3584   int eol_crlf =
3585     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3586   int byte_after_cr = -1;
3587   int i;
3588
3589   CODING_GET_INFO (coding, attrs, charset_list);
3590   setup_iso_safe_charsets (attrs);
3591   /* Charset list may have been changed.  */
3592   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3593   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3594
3595   if (cmp_status->state != COMPOSING_NO)
3596     {
3597       for (i = 0; i < cmp_status->length; i++)
3598         *charbuf++ = cmp_status->carryover[i];
3599       coding->annotated = 1;
3600     }
3601
3602   while (1)
3603     {
3604       int c1, c2, c3;
3605
3606       src_base = src;
3607       consumed_chars_base = consumed_chars;
3608
3609       if (charbuf >= charbuf_end)
3610         {
3611           if (byte_after_cr >= 0)
3612             src_base--;
3613           break;
3614         }
3615
3616       if (byte_after_cr >= 0)
3617         c1 = byte_after_cr, byte_after_cr = -1;
3618       else
3619         ONE_MORE_BYTE (c1);
3620       if (c1 < 0)
3621         goto invalid_code;
3622
3623       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3624         {
3625           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3626           char_offset++;
3627           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3628           continue;
3629         }
3630
3631       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3632         {
3633           if (c1 == ISO_CODE_ESC)
3634             {
3635               if (src + 1 >= src_end)
3636                 goto no_more_source;
3637               *charbuf++ = ISO_CODE_ESC;
3638               char_offset++;
3639               if (src[0] == '%' && src[1] == '@')
3640                 {
3641                   src += 2;
3642                   consumed_chars += 2;
3643                   char_offset += 2;
3644                   /* We are sure charbuf can contain two more chars. */
3645                   *charbuf++ = '%';
3646                   *charbuf++ = '@';
3647                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3648                 }
3649             }
3650           else
3651             {
3652               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3653               char_offset++;
3654             }
3655           continue;
3656         }
3657
3658       if ((cmp_status->state == COMPOSING_RULE
3659            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3660           && c1 != ISO_CODE_ESC)
3661         {
3662           int rule, nbytes;
3663
3664           DECODE_COMPOSITION_RULE (rule, nbytes);
3665           if (rule < 0)
3666             goto invalid_code;
3667           STORE_COMPOSITION_RULE (rule);
3668           continue;
3669         }
3670
3671       /* We produce at most one character.  */
3672       switch (iso_code_class [c1])
3673         {
3674         case ISO_0x20_or_0x7F:
3675           if (charset_id_0 < 0
3676               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3677             /* This is SPACE or DEL.  */
3678             charset = CHARSET_FROM_ID (charset_ascii);
3679           else
3680             charset = CHARSET_FROM_ID (charset_id_0);
3681           break;
3682
3683         case ISO_graphic_plane_0:
3684           if (charset_id_0 < 0)
3685             charset = CHARSET_FROM_ID (charset_ascii);
3686           else
3687             charset = CHARSET_FROM_ID (charset_id_0);
3688           break;
3689
3690         case ISO_0xA0_or_0xFF:
3691           if (charset_id_1 < 0
3692               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3693               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3694             goto invalid_code;
3695           /* This is a graphic character, we fall down ... */
3696
3697         case ISO_graphic_plane_1:
3698           if (charset_id_1 < 0)
3699             goto invalid_code;
3700           charset = CHARSET_FROM_ID (charset_id_1);
3701           break;
3702
3703         case ISO_control_0:
3704           if (eol_crlf && c1 == '\r')
3705             ONE_MORE_BYTE (byte_after_cr);
3706           MAYBE_FINISH_COMPOSITION ();
3707           charset = CHARSET_FROM_ID (charset_ascii);
3708           break;
3709
3710         case ISO_control_1:
3711           goto invalid_code;
3712
3713         case ISO_shift_out:
3714           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3715               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3716             goto invalid_code;
3717           CODING_ISO_INVOCATION (coding, 0) = 1;
3718           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3719           continue;
3720
3721         case ISO_shift_in:
3722           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3723             goto invalid_code;
3724           CODING_ISO_INVOCATION (coding, 0) = 0;
3725           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3726           continue;
3727
3728         case ISO_single_shift_2_7:
3729         case ISO_single_shift_2:
3730           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3731             goto invalid_code;
3732           /* SS2 is handled as an escape sequence of ESC 'N' */
3733           c1 = 'N';
3734           goto label_escape_sequence;
3735
3736         case ISO_single_shift_3:
3737           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3738             goto invalid_code;
3739           /* SS2 is handled as an escape sequence of ESC 'O' */
3740           c1 = 'O';
3741           goto label_escape_sequence;
3742
3743         case ISO_control_sequence_introducer:
3744           /* CSI is handled as an escape sequence of ESC '[' ...  */
3745           c1 = '[';
3746           goto label_escape_sequence;
3747
3748         case ISO_escape:
3749           ONE_MORE_BYTE (c1);
3750         label_escape_sequence:
3751           /* Escape sequences handled here are invocation,
3752              designation, direction specification, and character
3753              composition specification.  */
3754           switch (c1)
3755             {
3756             case '&':           /* revision of following character set */
3757               ONE_MORE_BYTE (c1);
3758               if (!(c1 >= '@' && c1 <= '~'))
3759                 goto invalid_code;
3760               ONE_MORE_BYTE (c1);
3761               if (c1 != ISO_CODE_ESC)
3762                 goto invalid_code;
3763               ONE_MORE_BYTE (c1);
3764               goto label_escape_sequence;
3765
3766             case '$':           /* designation of 2-byte character set */
3767               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3768                 goto invalid_code;
3769               {
3770                 int reg, chars96;
3771
3772                 ONE_MORE_BYTE (c1);
3773                 if (c1 >= '@' && c1 <= 'B')
3774                   {     /* designation of JISX0208.1978, GB2312.1980,
3775                            or JISX0208.1980 */
3776                     reg = 0, chars96 = 0;
3777                   }
3778                 else if (c1 >= 0x28 && c1 <= 0x2B)
3779                   { /* designation of DIMENSION2_CHARS94 character set */
3780                     reg = c1 - 0x28, chars96 = 0;
3781                     ONE_MORE_BYTE (c1);
3782                   }
3783                 else if (c1 >= 0x2C && c1 <= 0x2F)
3784                   { /* designation of DIMENSION2_CHARS96 character set */
3785                     reg = c1 - 0x2C, chars96 = 1;
3786                     ONE_MORE_BYTE (c1);
3787                   }
3788                 else
3789                   goto invalid_code;
3790                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3791                 /* We must update these variables now.  */
3792                 if (reg == 0)
3793                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3794                 else if (reg == 1)
3795                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3796                 if (chars96 < 0)
3797                   goto invalid_code;
3798               }
3799               continue;
3800
3801             case 'n':           /* invocation of locking-shift-2 */
3802               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3803                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3804                 goto invalid_code;
3805               CODING_ISO_INVOCATION (coding, 0) = 2;
3806               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3807               continue;
3808
3809             case 'o':           /* invocation of locking-shift-3 */
3810               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3811                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3812                 goto invalid_code;
3813               CODING_ISO_INVOCATION (coding, 0) = 3;
3814               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3815               continue;
3816
3817             case 'N':           /* invocation of single-shift-2 */
3818               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3819                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3820                 goto invalid_code;
3821               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3822               if (charset_id_2 < 0)
3823                 charset = CHARSET_FROM_ID (charset_ascii);
3824               else
3825                 charset = CHARSET_FROM_ID (charset_id_2);
3826               ONE_MORE_BYTE (c1);
3827               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3828                 goto invalid_code;
3829               break;
3830
3831             case 'O':           /* invocation of single-shift-3 */
3832               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3833                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3834                 goto invalid_code;
3835               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3836               if (charset_id_3 < 0)
3837                 charset = CHARSET_FROM_ID (charset_ascii);
3838               else
3839                 charset = CHARSET_FROM_ID (charset_id_3);
3840               ONE_MORE_BYTE (c1);
3841               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3842                 goto invalid_code;
3843               break;
3844
3845             case '0': case '2': case '3': case '4': /* start composition */
3846               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3847                 goto invalid_code;
3848               if (last_id != charset_ascii)
3849                 {
3850                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3851                   last_id = charset_ascii;
3852                   last_offset = char_offset;
3853                 }
3854               DECODE_COMPOSITION_START (c1);
3855               continue;
3856
3857             case '1':           /* end composition */
3858               if (cmp_status->state == COMPOSING_NO)
3859                 goto invalid_code;
3860               DECODE_COMPOSITION_END ();
3861               continue;
3862
3863             case '[':           /* specification of direction */
3864               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3865                 goto invalid_code;
3866               /* For the moment, nested direction is not supported.
3867                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3868                  left-to-right, and nozero means right-to-left.  */
3869               ONE_MORE_BYTE (c1);
3870               switch (c1)
3871                 {
3872                 case ']':       /* end of the current direction */
3873                   coding->mode &= ~CODING_MODE_DIRECTION;
3874
3875                 case '0':       /* end of the current direction */
3876                 case '1':       /* start of left-to-right direction */
3877                   ONE_MORE_BYTE (c1);
3878                   if (c1 == ']')
3879                     coding->mode &= ~CODING_MODE_DIRECTION;
3880                   else
3881                     goto invalid_code;
3882                   break;
3883
3884                 case '2':       /* start of right-to-left direction */
3885                   ONE_MORE_BYTE (c1);
3886                   if (c1 == ']')
3887                     coding->mode |= CODING_MODE_DIRECTION;
3888                   else
3889                     goto invalid_code;
3890                   break;
3891
3892                 default:
3893                   goto invalid_code;
3894                 }
3895               continue;
3896
3897             case '%':
3898               ONE_MORE_BYTE (c1);
3899               if (c1 == '/')
3900                 {
3901                   /* CTEXT extended segment:
3902                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3903                      We keep these bytes as is for the moment.
3904                      They may be decoded by post-read-conversion.  */
3905                   int dim, M, L;
3906                   int size;
3907
3908                   ONE_MORE_BYTE (dim);
3909                   if (dim < 0 || dim > 4)
3910                     goto invalid_code;
3911                   ONE_MORE_BYTE (M);
3912                   if (M < 128)
3913                     goto invalid_code;
3914                   ONE_MORE_BYTE (L);
3915                   if (L < 128)
3916                     goto invalid_code;
3917                   size = ((M - 128) * 128) + (L - 128);
3918                   if (charbuf + 6 > charbuf_end)
3919                     goto break_loop;
3920                   *charbuf++ = ISO_CODE_ESC;
3921                   *charbuf++ = '%';
3922                   *charbuf++ = '/';
3923                   *charbuf++ = dim;
3924                   *charbuf++ = BYTE8_TO_CHAR (M);
3925                   *charbuf++ = BYTE8_TO_CHAR (L);
3926                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3927                 }
3928               else if (c1 == 'G')
3929                 {
3930                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3931                      ESC % G --UTF-8-BYTES-- ESC % @
3932                      We keep these bytes as is for the moment.
3933                      They may be decoded by post-read-conversion.  */
3934                   if (charbuf + 3 > charbuf_end)
3935                     goto break_loop;
3936                   *charbuf++ = ISO_CODE_ESC;
3937                   *charbuf++ = '%';
3938                   *charbuf++ = 'G';
3939                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3940                 }
3941               else
3942                 goto invalid_code;
3943               continue;
3944               break;
3945
3946             default:
3947               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3948                 goto invalid_code;
3949               {
3950                 int reg, chars96;
3951
3952                 if (c1 >= 0x28 && c1 <= 0x2B)
3953                   { /* designation of DIMENSION1_CHARS94 character set */
3954                     reg = c1 - 0x28, chars96 = 0;
3955                     ONE_MORE_BYTE (c1);
3956                   }
3957                 else if (c1 >= 0x2C && c1 <= 0x2F)
3958                   { /* designation of DIMENSION1_CHARS96 character set */
3959                     reg = c1 - 0x2C, chars96 = 1;
3960                     ONE_MORE_BYTE (c1);
3961                   }
3962                 else
3963                   goto invalid_code;
3964                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3965                 /* We must update these variables now.  */
3966                 if (reg == 0)
3967                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3968                 else if (reg == 1)
3969                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3970                 if (chars96 < 0)
3971                   goto invalid_code;
3972               }
3973               continue;
3974             }
3975         }
3976
3977       if (cmp_status->state == COMPOSING_NO
3978           && charset->id != charset_ascii
3979           && last_id != charset->id)
3980         {
3981           if (last_id != charset_ascii)
3982             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3983           last_id = charset->id;
3984           last_offset = char_offset;
3985         }
3986
3987       /* Now we know CHARSET and 1st position code C1 of a character.
3988          Produce a decoded character while getting 2nd and 3rd
3989          position codes C2, C3 if necessary.  */
3990       if (CHARSET_DIMENSION (charset) > 1)
3991         {
3992           ONE_MORE_BYTE (c2);
3993           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3994               || ((c1 & 0x80) != (c2 & 0x80)))
3995             /* C2 is not in a valid range.  */
3996             goto invalid_code;
3997           if (CHARSET_DIMENSION (charset) == 2)
3998             c1 = (c1 << 8) | c2;
3999           else
4000             {
4001               ONE_MORE_BYTE (c3);
4002               if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
4003                   || ((c1 & 0x80) != (c3 & 0x80)))
4004                 /* C3 is not in a valid range.  */
4005                 goto invalid_code;
4006               c1 = (c1 << 16) | (c2 << 8) | c2;
4007             }
4008         }
4009       c1 &= 0x7F7F7F;
4010       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4011       if (c < 0)
4012         {
4013           MAYBE_FINISH_COMPOSITION ();
4014           for (; src_base < src; src_base++, char_offset++)
4015             {
4016               if (ASCII_BYTE_P (*src_base))
4017                 *charbuf++ = *src_base;
4018               else
4019                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4020             }
4021         }
4022       else if (cmp_status->state == COMPOSING_NO)
4023         {
4024           *charbuf++ = c;
4025           char_offset++;
4026         }
4027       else if ((cmp_status->state == COMPOSING_CHAR
4028                 ? cmp_status->nchars
4029                 : cmp_status->ncomps)
4030                >= MAX_COMPOSITION_COMPONENTS)
4031         {
4032           /* Too long composition.  */
4033           MAYBE_FINISH_COMPOSITION ();
4034           *charbuf++ = c;
4035           char_offset++;
4036         }
4037       else
4038         STORE_COMPOSITION_CHAR (c);
4039       continue;
4040
4041     invalid_code:
4042       MAYBE_FINISH_COMPOSITION ();
4043       src = src_base;
4044       consumed_chars = consumed_chars_base;
4045       ONE_MORE_BYTE (c);
4046       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4047       char_offset++;
4048       coding->errors++;
4049       continue;
4050
4051     break_loop:
4052       break;
4053     }
4054
4055  no_more_source:
4056   if (cmp_status->state != COMPOSING_NO)
4057     {
4058       if (coding->mode & CODING_MODE_LAST_BLOCK)
4059         MAYBE_FINISH_COMPOSITION ();
4060       else
4061         {
4062           charbuf -= cmp_status->length;
4063           for (i = 0; i < cmp_status->length; i++)
4064             cmp_status->carryover[i] = charbuf[i];
4065         }
4066     }
4067   else if (last_id != charset_ascii)
4068     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4069   coding->consumed_char += consumed_chars_base;
4070   coding->consumed = src_base - coding->source;
4071   coding->charbuf_used = charbuf - coding->charbuf;
4072 }
4073
4074
4075 /* ISO2022 encoding stuff.  */
4076
4077 /*
4078    It is not enough to say just "ISO2022" on encoding, we have to
4079    specify more details.  In Emacs, each coding system of ISO2022
4080    variant has the following specifications:
4081         1. Initial designation to G0 thru G3.
4082         2. Allows short-form designation?
4083         3. ASCII should be designated to G0 before control characters?
4084         4. ASCII should be designated to G0 at end of line?
4085         5. 7-bit environment or 8-bit environment?
4086         6. Use locking-shift?
4087         7. Use Single-shift?
4088    And the following two are only for Japanese:
4089         8. Use ASCII in place of JIS0201-1976-Roman?
4090         9. Use JISX0208-1983 in place of JISX0208-1978?
4091    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4092    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4093    details.
4094 */
4095
4096 /* Produce codes (escape sequence) for designating CHARSET to graphic
4097    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4098    '@', 'A', or 'B' and the coding system CODING allows, produce
4099    designation sequence of short-form.  */
4100
4101 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4102   do {                                                                  \
4103     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4104     char *intermediate_char_94 = "()*+";                                \
4105     char *intermediate_char_96 = ",-./";                                \
4106     int revision = -1;                                                  \
4107     int c;                                                              \
4108                                                                         \
4109     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4110       revision = CHARSET_ISO_REVISION (charset);                        \
4111                                                                         \
4112     if (revision >= 0)                                                  \
4113       {                                                                 \
4114         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4115         EMIT_ONE_BYTE ('@' + revision);                                 \
4116       }                                                                 \
4117     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4118     if (CHARSET_DIMENSION (charset) == 1)                               \
4119       {                                                                 \
4120         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4121           c = intermediate_char_94[reg];                                \
4122         else                                                            \
4123           c = intermediate_char_96[reg];                                \
4124         EMIT_ONE_ASCII_BYTE (c);                                        \
4125       }                                                                 \
4126     else                                                                \
4127       {                                                                 \
4128         EMIT_ONE_ASCII_BYTE ('$');                                      \
4129         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4130           {                                                             \
4131             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4132                 || reg != 0                                             \
4133                 || final_char < '@' || final_char > 'B')                \
4134               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4135           }                                                             \
4136         else                                                            \
4137           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4138       }                                                                 \
4139     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4140                                                                         \
4141     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4142   } while (0)
4143
4144
4145 /* The following two macros produce codes (control character or escape
4146    sequence) for ISO2022 single-shift functions (single-shift-2 and
4147    single-shift-3).  */
4148
4149 #define ENCODE_SINGLE_SHIFT_2                                           \
4150   do {                                                                  \
4151     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4152       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4153     else                                                                \
4154       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4155     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4156   } while (0)
4157
4158
4159 #define ENCODE_SINGLE_SHIFT_3                                           \
4160   do {                                                                  \
4161     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4162       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4163     else                                                                \
4164       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4165     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4166   } while (0)
4167
4168
4169 /* The following four macros produce codes (control character or
4170    escape sequence) for ISO2022 locking-shift functions (shift-in,
4171    shift-out, locking-shift-2, and locking-shift-3).  */
4172
4173 #define ENCODE_SHIFT_IN                                 \
4174   do {                                                  \
4175     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4176     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4177   } while (0)
4178
4179
4180 #define ENCODE_SHIFT_OUT                                \
4181   do {                                                  \
4182     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4183     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4184   } while (0)
4185
4186
4187 #define ENCODE_LOCKING_SHIFT_2                          \
4188   do {                                                  \
4189     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4190     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4191   } while (0)
4192
4193
4194 #define ENCODE_LOCKING_SHIFT_3                          \
4195   do {                                                  \
4196     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4197     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4198   } while (0)
4199
4200
4201 /* Produce codes for a DIMENSION1 character whose character set is
4202    CHARSET and whose position-code is C1.  Designation and invocation
4203    sequences are also produced in advance if necessary.  */
4204
4205 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4206   do {                                                                  \
4207     int id = CHARSET_ID (charset);                                      \
4208                                                                         \
4209     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4210         && id == charset_ascii)                                         \
4211       {                                                                 \
4212         id = charset_jisx0201_roman;                                    \
4213         charset = CHARSET_FROM_ID (id);                                 \
4214       }                                                                 \
4215                                                                         \
4216     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4217       {                                                                 \
4218         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4219           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4220         else                                                            \
4221           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4222         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4223         break;                                                          \
4224       }                                                                 \
4225     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4226       {                                                                 \
4227         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4228         break;                                                          \
4229       }                                                                 \
4230     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4231       {                                                                 \
4232         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4233         break;                                                          \
4234       }                                                                 \
4235     else                                                                \
4236       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4237          must invoke it, or, at first, designate it to some graphic     \
4238          register.  Then repeat the loop to actually produce the        \
4239          character.  */                                                 \
4240       dst = encode_invocation_designation (charset, coding, dst,        \
4241                                            &produced_chars);            \
4242   } while (1)
4243
4244
4245 /* Produce codes for a DIMENSION2 character whose character set is
4246    CHARSET and whose position-codes are C1 and C2.  Designation and
4247    invocation codes are also produced in advance if necessary.  */
4248
4249 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4250   do {                                                                  \
4251     int id = CHARSET_ID (charset);                                      \
4252                                                                         \
4253     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4254         && id == charset_jisx0208)                                      \
4255       {                                                                 \
4256         id = charset_jisx0208_1978;                                     \
4257         charset = CHARSET_FROM_ID (id);                                 \
4258       }                                                                 \
4259                                                                         \
4260     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4261       {                                                                 \
4262         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4263           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4264         else                                                            \
4265           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4266         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4267         break;                                                          \
4268       }                                                                 \
4269     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4270       {                                                                 \
4271         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4272         break;                                                          \
4273       }                                                                 \
4274     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4275       {                                                                 \
4276         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4277         break;                                                          \
4278       }                                                                 \
4279     else                                                                \
4280       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4281          must invoke it, or, at first, designate it to some graphic     \
4282          register.  Then repeat the loop to actually produce the        \
4283          character.  */                                                 \
4284       dst = encode_invocation_designation (charset, coding, dst,        \
4285                                            &produced_chars);            \
4286   } while (1)
4287
4288
4289 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4290   do {                                                                     \
4291     int code = ENCODE_CHAR ((charset),(c));                                \
4292                                                                            \
4293     if (CHARSET_DIMENSION (charset) == 1)                                  \
4294       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4295     else                                                                   \
4296       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4297   } while (0)
4298
4299
4300 /* Produce designation and invocation codes at a place pointed by DST
4301    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4302    Return new DST.  */
4303
4304 unsigned char *
4305 encode_invocation_designation (charset, coding, dst, p_nchars)
4306      struct charset *charset;
4307      struct coding_system *coding;
4308      unsigned char *dst;
4309      int *p_nchars;
4310 {
4311   int multibytep = coding->dst_multibyte;
4312   int produced_chars = *p_nchars;
4313   int reg;                      /* graphic register number */
4314   int id = CHARSET_ID (charset);
4315
4316   /* At first, check designations.  */
4317   for (reg = 0; reg < 4; reg++)
4318     if (id == CODING_ISO_DESIGNATION (coding, reg))
4319       break;
4320
4321   if (reg >= 4)
4322     {
4323       /* CHARSET is not yet designated to any graphic registers.  */
4324       /* At first check the requested designation.  */
4325       reg = CODING_ISO_REQUEST (coding, id);
4326       if (reg < 0)
4327         /* Since CHARSET requests no special designation, designate it
4328            to graphic register 0.  */
4329         reg = 0;
4330
4331       ENCODE_DESIGNATION (charset, reg, coding);
4332     }
4333
4334   if (CODING_ISO_INVOCATION (coding, 0) != reg
4335       && CODING_ISO_INVOCATION (coding, 1) != reg)
4336     {
4337       /* Since the graphic register REG is not invoked to any graphic
4338          planes, invoke it to graphic plane 0.  */
4339       switch (reg)
4340         {
4341         case 0:                 /* graphic register 0 */
4342           ENCODE_SHIFT_IN;
4343           break;
4344
4345         case 1:                 /* graphic register 1 */
4346           ENCODE_SHIFT_OUT;
4347           break;
4348
4349         case 2:                 /* graphic register 2 */
4350           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4351             ENCODE_SINGLE_SHIFT_2;
4352           else
4353             ENCODE_LOCKING_SHIFT_2;
4354           break;
4355
4356         case 3:                 /* graphic register 3 */
4357           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4358             ENCODE_SINGLE_SHIFT_3;
4359           else
4360             ENCODE_LOCKING_SHIFT_3;
4361           break;
4362         }
4363     }
4364
4365   *p_nchars = produced_chars;
4366   return dst;
4367 }
4368
4369 /* The following three macros produce codes for indicating direction
4370    of text.  */
4371 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
4372   do {                                                                  \
4373     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
4374       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
4375     else                                                                \
4376       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
4377   } while (0)
4378
4379
4380 #define ENCODE_DIRECTION_R2L()                  \
4381   do {                                          \
4382     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4383     EMIT_TWO_ASCII_BYTES ('2', ']');            \
4384   } while (0)
4385
4386
4387 #define ENCODE_DIRECTION_L2R()                  \
4388   do {                                          \
4389     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4390     EMIT_TWO_ASCII_BYTES ('0', ']');            \
4391   } while (0)
4392
4393
4394 /* Produce codes for designation and invocation to reset the graphic
4395    planes and registers to initial state.  */
4396 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4397   do {                                                                  \
4398     int reg;                                                            \
4399     struct charset *charset;                                            \
4400                                                                         \
4401     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4402       ENCODE_SHIFT_IN;                                                  \
4403     for (reg = 0; reg < 4; reg++)                                       \
4404       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4405           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4406               != CODING_ISO_INITIAL (coding, reg)))                     \
4407         {                                                               \
4408           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4409           ENCODE_DESIGNATION (charset, reg, coding);                    \
4410         }                                                               \
4411   } while (0)
4412
4413
4414 /* Produce designation sequences of charsets in the line started from
4415    SRC to a place pointed by DST, and return updated DST.
4416
4417    If the current block ends before any end-of-line, we may fail to
4418    find all the necessary designations.  */
4419
4420 static unsigned char *
4421 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
4422      struct coding_system *coding;
4423      int *charbuf, *charbuf_end;
4424      unsigned char *dst;
4425 {
4426   struct charset *charset;
4427   /* Table of charsets to be designated to each graphic register.  */
4428   int r[4];
4429   int c, found = 0, reg;
4430   int produced_chars = 0;
4431   int multibytep = coding->dst_multibyte;
4432   Lisp_Object attrs;
4433   Lisp_Object charset_list;
4434
4435   attrs = CODING_ID_ATTRS (coding->id);
4436   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4437   if (EQ (charset_list, Qiso_2022))
4438     charset_list = Viso_2022_charset_list;
4439
4440   for (reg = 0; reg < 4; reg++)
4441     r[reg] = -1;
4442
4443   while (found < 4)
4444     {
4445       int id;
4446
4447       c = *charbuf++;
4448       if (c == '\n')
4449         break;
4450       charset = char_charset (c, charset_list, NULL);
4451       id = CHARSET_ID (charset);
4452       reg = CODING_ISO_REQUEST (coding, id);
4453       if (reg >= 0 && r[reg] < 0)
4454         {
4455           found++;
4456           r[reg] = id;
4457         }
4458     }
4459
4460   if (found)
4461     {
4462       for (reg = 0; reg < 4; reg++)
4463         if (r[reg] >= 0
4464             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4465           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4466     }
4467
4468   return dst;
4469 }
4470
4471 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4472
4473 static int
4474 encode_coding_iso_2022 (coding)
4475      struct coding_system *coding;
4476 {
4477   int multibytep = coding->dst_multibyte;
4478   int *charbuf = coding->charbuf;
4479   int *charbuf_end = charbuf + coding->charbuf_used;
4480   unsigned char *dst = coding->destination + coding->produced;
4481   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4482   int safe_room = 16;
4483   int bol_designation
4484     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4485        && CODING_ISO_BOL (coding));
4486   int produced_chars = 0;
4487   Lisp_Object attrs, eol_type, charset_list;
4488   int ascii_compatible;
4489   int c;
4490   int preferred_charset_id = -1;
4491
4492   CODING_GET_INFO (coding, attrs, charset_list);
4493   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4494   if (VECTORP (eol_type))
4495     eol_type = Qunix;
4496
4497   setup_iso_safe_charsets (attrs);
4498   /* Charset list may have been changed.  */
4499   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4500   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4501
4502   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4503
4504   while (charbuf < charbuf_end)
4505     {
4506       ASSURE_DESTINATION (safe_room);
4507
4508       if (bol_designation)
4509         {
4510           unsigned char *dst_prev = dst;
4511
4512           /* We have to produce designation sequences if any now.  */
4513           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4514           bol_designation = 0;
4515           /* We are sure that designation sequences are all ASCII bytes.  */
4516           produced_chars += dst - dst_prev;
4517         }
4518
4519       c = *charbuf++;
4520
4521       if (c < 0)
4522         {
4523           /* Handle an annotation.  */
4524           switch (*charbuf)
4525             {
4526             case CODING_ANNOTATE_COMPOSITION_MASK:
4527               /* Not yet implemented.  */
4528               break;
4529             case CODING_ANNOTATE_CHARSET_MASK:
4530               preferred_charset_id = charbuf[2];
4531               if (preferred_charset_id >= 0
4532                   && NILP (Fmemq (make_number (preferred_charset_id),
4533                                   charset_list)))
4534                 preferred_charset_id = -1;
4535               break;
4536             default:
4537               abort ();
4538             }
4539           charbuf += -c - 1;
4540           continue;
4541         }
4542
4543       /* Now encode the character C.  */
4544       if (c < 0x20 || c == 0x7F)
4545         {
4546           if (c == '\n'
4547               || (c == '\r' && EQ (eol_type, Qmac)))
4548             {
4549               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4550                 ENCODE_RESET_PLANE_AND_REGISTER ();
4551               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4552                 {
4553                   int i;
4554
4555                   for (i = 0; i < 4; i++)
4556                     CODING_ISO_DESIGNATION (coding, i)
4557                       = CODING_ISO_INITIAL (coding, i);
4558                 }
4559               bol_designation
4560                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4561             }
4562           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4563             ENCODE_RESET_PLANE_AND_REGISTER ();
4564           EMIT_ONE_ASCII_BYTE (c);
4565         }
4566       else if (ASCII_CHAR_P (c))
4567         {
4568           if (ascii_compatible)
4569             EMIT_ONE_ASCII_BYTE (c);
4570           else
4571             {
4572               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4573               ENCODE_ISO_CHARACTER (charset, c);
4574             }
4575         }
4576       else if (CHAR_BYTE8_P (c))
4577         {
4578           c = CHAR_TO_BYTE8 (c);
4579           EMIT_ONE_BYTE (c);
4580         }
4581       else
4582         {
4583           struct charset *charset;
4584
4585           if (preferred_charset_id >= 0)
4586             {
4587               charset = CHARSET_FROM_ID (preferred_charset_id);
4588               if (! CHAR_CHARSET_P (c, charset))
4589                 charset = char_charset (c, charset_list, NULL);
4590             }
4591           else
4592             charset = char_charset (c, charset_list, NULL);
4593           if (!charset)
4594             {
4595               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4596                 {
4597                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4598                   charset = CHARSET_FROM_ID (charset_ascii);
4599                 }
4600               else
4601                 {
4602                   c = coding->default_char;
4603                   charset = char_charset (c, charset_list, NULL);
4604                 }
4605             }
4606           ENCODE_ISO_CHARACTER (charset, c);
4607         }
4608     }
4609
4610   if (coding->mode & CODING_MODE_LAST_BLOCK
4611       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4612     {
4613       ASSURE_DESTINATION (safe_room);
4614       ENCODE_RESET_PLANE_AND_REGISTER ();
4615     }
4616   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4617   CODING_ISO_BOL (coding) = bol_designation;
4618   coding->produced_char += produced_chars;
4619   coding->produced = dst - coding->destination;
4620   return 0;
4621 }
4622
4623 \f
4624 /*** 8,9. SJIS and BIG5 handlers ***/
4625
4626 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4627    quite widely.  So, for the moment, Emacs supports them in the bare
4628    C code.  But, in the future, they may be supported only by CCL.  */
4629
4630 /* SJIS is a coding system encoding three character sets: ASCII, right
4631    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4632    as is.  A character of charset katakana-jisx0201 is encoded by
4633    "position-code + 0x80".  A character of charset japanese-jisx0208
4634    is encoded in 2-byte but two position-codes are divided and shifted
4635    so that it fit in the range below.
4636
4637    --- CODE RANGE of SJIS ---
4638    (character set)      (range)
4639    ASCII                0x00 .. 0x7F
4640    KATAKANA-JISX0201    0xA0 .. 0xDF
4641    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4642             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4643    -------------------------------
4644
4645 */
4646
4647 /* BIG5 is a coding system encoding two character sets: ASCII and
4648    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4649    character set and is encoded in two-byte.
4650
4651    --- CODE RANGE of BIG5 ---
4652    (character set)      (range)
4653    ASCII                0x00 .. 0x7F
4654    Big5 (1st byte)      0xA1 .. 0xFE
4655         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4656    --------------------------
4657
4658   */
4659
4660 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4661    Check if a text is encoded in SJIS.  If it is, return
4662    CATEGORY_MASK_SJIS, else return 0.  */
4663
4664 static int
4665 detect_coding_sjis (coding, detect_info)
4666      struct coding_system *coding;
4667      struct coding_detection_info *detect_info;
4668 {
4669   const unsigned char *src = coding->source, *src_base;
4670   const unsigned char *src_end = coding->source + coding->src_bytes;
4671   int multibytep = coding->src_multibyte;
4672   int consumed_chars = 0;
4673   int found = 0;
4674   int c;
4675   Lisp_Object attrs, charset_list;
4676   int max_first_byte_of_2_byte_code;
4677
4678   CODING_GET_INFO (coding, attrs, charset_list);
4679   max_first_byte_of_2_byte_code
4680     = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4681
4682   detect_info->checked |= CATEGORY_MASK_SJIS;
4683   /* A coding system of this category is always ASCII compatible.  */
4684   src += coding->head_ascii;
4685
4686   while (1)
4687     {
4688       src_base = src;
4689       ONE_MORE_BYTE (c);
4690       if (c < 0x80)
4691         continue;
4692       if ((c >= 0x81 && c <= 0x9F)
4693           || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4694         {
4695           ONE_MORE_BYTE (c);
4696           if (c < 0x40 || c == 0x7F || c > 0xFC)
4697             break;
4698           found = CATEGORY_MASK_SJIS;
4699         }
4700       else if (c >= 0xA0 && c < 0xE0)
4701         found = CATEGORY_MASK_SJIS;
4702       else
4703         break;
4704     }
4705   detect_info->rejected |= CATEGORY_MASK_SJIS;
4706   return 0;
4707
4708  no_more_source:
4709   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4710     {
4711       detect_info->rejected |= CATEGORY_MASK_SJIS;
4712       return 0;
4713     }
4714   detect_info->found |= found;
4715   return 1;
4716 }
4717
4718 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4719    Check if a text is encoded in BIG5.  If it is, return
4720    CATEGORY_MASK_BIG5, else return 0.  */
4721
4722 static int
4723 detect_coding_big5 (coding, detect_info)
4724      struct coding_system *coding;
4725      struct coding_detection_info *detect_info;
4726 {
4727   const unsigned char *src = coding->source, *src_base;
4728   const unsigned char *src_end = coding->source + coding->src_bytes;
4729   int multibytep = coding->src_multibyte;
4730   int consumed_chars = 0;
4731   int found = 0;
4732   int c;
4733
4734   detect_info->checked |= CATEGORY_MASK_BIG5;
4735   /* A coding system of this category is always ASCII compatible.  */
4736   src += coding->head_ascii;
4737
4738   while (1)
4739     {
4740       src_base = src;
4741       ONE_MORE_BYTE (c);
4742       if (c < 0x80)
4743         continue;
4744       if (c >= 0xA1)
4745         {
4746           ONE_MORE_BYTE (c);
4747           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4748             return 0;
4749           found = CATEGORY_MASK_BIG5;
4750         }
4751       else
4752         break;
4753     }
4754   detect_info->rejected |= CATEGORY_MASK_BIG5;
4755   return 0;
4756
4757  no_more_source:
4758   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4759     {
4760       detect_info->rejected |= CATEGORY_MASK_BIG5;
4761       return 0;
4762     }
4763   detect_info->found |= found;
4764   return 1;
4765 }
4766
4767 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4768    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4769
4770 static void
4771 decode_coding_sjis (coding)
4772      struct coding_system *coding;
4773 {
4774   const unsigned char *src = coding->source + coding->consumed;
4775   const unsigned char *src_end = coding->source + coding->src_bytes;
4776   const unsigned char *src_base;
4777   int *charbuf = coding->charbuf + coding->charbuf_used;
4778   /* We may produce one charset annocation in one loop and one more at
4779      the end.  */
4780   int *charbuf_end
4781     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4782   int consumed_chars = 0, consumed_chars_base;
4783   int multibytep = coding->src_multibyte;
4784   struct charset *charset_roman, *charset_kanji, *charset_kana;
4785   struct charset *charset_kanji2;
4786   Lisp_Object attrs, charset_list, val;
4787   int char_offset = coding->produced_char;
4788   int last_offset = char_offset;
4789   int last_id = charset_ascii;
4790   int eol_crlf =
4791     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4792   int byte_after_cr = -1;
4793
4794   CODING_GET_INFO (coding, attrs, charset_list);
4795
4796   val = charset_list;
4797   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4798   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4799   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4800   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4801
4802   while (1)
4803     {
4804       int c, c1;
4805       struct charset *charset;
4806
4807       src_base = src;
4808       consumed_chars_base = consumed_chars;
4809
4810       if (charbuf >= charbuf_end)
4811         {
4812           if (byte_after_cr >= 0)
4813             src_base--;
4814           break;
4815         }
4816
4817       if (byte_after_cr >= 0)
4818         c = byte_after_cr, byte_after_cr = -1;
4819       else
4820         ONE_MORE_BYTE (c);
4821       if (c < 0)
4822         goto invalid_code;
4823       if (c < 0x80)
4824         {
4825           if (eol_crlf && c == '\r')
4826             ONE_MORE_BYTE (byte_after_cr);
4827           charset = charset_roman;
4828         }
4829       else if (c == 0x80 || c == 0xA0)
4830         goto invalid_code;
4831       else if (c >= 0xA1 && c <= 0xDF)
4832         {
4833           /* SJIS -> JISX0201-Kana */
4834           c &= 0x7F;
4835           charset = charset_kana;
4836         }
4837       else if (c <= 0xEF)
4838         {
4839           /* SJIS -> JISX0208 */
4840           ONE_MORE_BYTE (c1);
4841           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4842             goto invalid_code;
4843           c = (c << 8) | c1;
4844           SJIS_TO_JIS (c);
4845           charset = charset_kanji;
4846         }
4847       else if (c <= 0xFC && charset_kanji2)
4848         {
4849           /* SJIS -> JISX0213-2 */
4850           ONE_MORE_BYTE (c1);
4851           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4852             goto invalid_code;
4853           c = (c << 8) | c1;
4854           SJIS_TO_JIS2 (c);
4855           charset = charset_kanji2;
4856         }
4857       else
4858         goto invalid_code;
4859       if (charset->id != charset_ascii
4860           && last_id != charset->id)
4861         {
4862           if (last_id != charset_ascii)
4863             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4864           last_id = charset->id;
4865           last_offset = char_offset;
4866         }
4867       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4868       *charbuf++ = c;
4869       char_offset++;
4870       continue;
4871
4872     invalid_code:
4873       src = src_base;
4874       consumed_chars = consumed_chars_base;
4875       ONE_MORE_BYTE (c);
4876       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4877       char_offset++;
4878       coding->errors++;
4879     }
4880
4881  no_more_source:
4882   if (last_id != charset_ascii)
4883     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4884   coding->consumed_char += consumed_chars_base;
4885   coding->consumed = src_base - coding->source;
4886   coding->charbuf_used = charbuf - coding->charbuf;
4887 }
4888
4889 static void
4890 decode_coding_big5 (coding)
4891      struct coding_system *coding;
4892 {
4893   const unsigned char *src = coding->source + coding->consumed;
4894   const unsigned char *src_end = coding->source + coding->src_bytes;
4895   const unsigned char *src_base;
4896   int *charbuf = coding->charbuf + coding->charbuf_used;
4897   /* We may produce one charset annocation in one loop and one more at
4898      the end.  */
4899   int *charbuf_end
4900     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4901   int consumed_chars = 0, consumed_chars_base;
4902   int multibytep = coding->src_multibyte;
4903   struct charset *charset_roman, *charset_big5;
4904   Lisp_Object attrs, charset_list, val;
4905   int char_offset = coding->produced_char;
4906   int last_offset = char_offset;
4907   int last_id = charset_ascii;
4908   int eol_crlf =
4909     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4910   int byte_after_cr = -1;
4911
4912   CODING_GET_INFO (coding, attrs, charset_list);
4913   val = charset_list;
4914   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4915   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4916
4917   while (1)
4918     {
4919       int c, c1;
4920       struct charset *charset;
4921
4922       src_base = src;
4923       consumed_chars_base = consumed_chars;
4924
4925       if (charbuf >= charbuf_end)
4926         {
4927           if (byte_after_cr >= 0)
4928             src_base--;
4929           break;
4930         }
4931
4932       if (byte_after_cr >= 0)
4933         c = byte_after_cr, byte_after_cr = -1;
4934       else
4935         ONE_MORE_BYTE (c);
4936
4937       if (c < 0)
4938         goto invalid_code;
4939       if (c < 0x80)
4940         {
4941           if (eol_crlf && c == '\r')
4942             ONE_MORE_BYTE (byte_after_cr);
4943           charset = charset_roman;
4944         }
4945       else
4946         {
4947           /* BIG5 -> Big5 */
4948           if (c < 0xA1 || c > 0xFE)
4949             goto invalid_code;
4950           ONE_MORE_BYTE (c1);
4951           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4952             goto invalid_code;
4953           c = c << 8 | c1;
4954           charset = charset_big5;
4955         }
4956       if (charset->id != charset_ascii
4957           && last_id != charset->id)
4958         {
4959           if (last_id != charset_ascii)
4960             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4961           last_id = charset->id;
4962           last_offset = char_offset;
4963         }
4964       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4965       *charbuf++ = c;
4966       char_offset++;
4967       continue;
4968
4969     invalid_code:
4970       src = src_base;
4971       consumed_chars = consumed_chars_base;
4972       ONE_MORE_BYTE (c);
4973       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4974       char_offset++;
4975       coding->errors++;
4976     }
4977
4978  no_more_source:
4979   if (last_id != charset_ascii)
4980     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4981   coding->consumed_char += consumed_chars_base;
4982   coding->consumed = src_base - coding->source;
4983   coding->charbuf_used = charbuf - coding->charbuf;
4984 }
4985
4986 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4987    This function can encode charsets `ascii', `katakana-jisx0201',
4988    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4989    are sure that all these charsets are registered as official charset
4990    (i.e. do not have extended leading-codes).  Characters of other
4991    charsets are produced without any encoding.  If SJIS_P is 1, encode
4992    SJIS text, else encode BIG5 text.  */
4993
4994 static int
4995 encode_coding_sjis (coding)
4996      struct coding_system *coding;
4997 {
4998   int multibytep = coding->dst_multibyte;
4999   int *charbuf = coding->charbuf;
5000   int *charbuf_end = charbuf + coding->charbuf_used;
5001   unsigned char *dst = coding->destination + coding->produced;
5002   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5003   int safe_room = 4;
5004   int produced_chars = 0;
5005   Lisp_Object attrs, charset_list, val;
5006   int ascii_compatible;
5007   struct charset *charset_roman, *charset_kanji, *charset_kana;
5008   struct charset *charset_kanji2;
5009   int c;
5010
5011   CODING_GET_INFO (coding, attrs, charset_list);
5012   val = charset_list;
5013   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5014   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5015   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5016   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
5017
5018   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5019
5020   while (charbuf < charbuf_end)
5021     {
5022       ASSURE_DESTINATION (safe_room);
5023       c = *charbuf++;
5024       /* Now encode the character C.  */
5025       if (ASCII_CHAR_P (c) && ascii_compatible)
5026         EMIT_ONE_ASCII_BYTE (c);
5027       else if (CHAR_BYTE8_P (c))
5028         {
5029           c = CHAR_TO_BYTE8 (c);
5030           EMIT_ONE_BYTE (c);
5031         }
5032       else
5033         {
5034           unsigned code;
5035           struct charset *charset = char_charset (c, charset_list, &code);
5036
5037           if (!charset)
5038             {
5039               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5040                 {
5041                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5042                   charset = CHARSET_FROM_ID (charset_ascii);
5043                 }
5044               else
5045                 {
5046                   c = coding->default_char;
5047                   charset = char_charset (c, charset_list, &code);
5048                 }
5049             }
5050           if (code == CHARSET_INVALID_CODE (charset))
5051             abort ();
5052           if (charset == charset_kanji)
5053             {
5054               int c1, c2;
5055               JIS_TO_SJIS (code);
5056               c1 = code >> 8, c2 = code & 0xFF;
5057               EMIT_TWO_BYTES (c1, c2);
5058             }
5059           else if (charset == charset_kana)
5060             EMIT_ONE_BYTE (code | 0x80);
5061           else if (charset_kanji2 && charset == charset_kanji2)
5062             {
5063               int c1, c2;
5064
5065               c1 = code >> 8;
5066               if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5067                   || c1 == 0x28
5068                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5069                 {
5070                   JIS_TO_SJIS2 (code);
5071                   c1 = code >> 8, c2 = code & 0xFF;
5072                   EMIT_TWO_BYTES (c1, c2);
5073                 }
5074               else
5075                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5076             }
5077           else
5078             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5079         }
5080     }
5081   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5082   coding->produced_char += produced_chars;
5083   coding->produced = dst - coding->destination;
5084   return 0;
5085 }
5086
5087 static int
5088 encode_coding_big5 (coding)
5089      struct coding_system *coding;
5090 {
5091   int multibytep = coding->dst_multibyte;
5092   int *charbuf = coding->charbuf;
5093   int *charbuf_end = charbuf + coding->charbuf_used;
5094   unsigned char *dst = coding->destination + coding->produced;
5095   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5096   int safe_room = 4;
5097   int produced_chars = 0;
5098   Lisp_Object attrs, charset_list, val;
5099   int ascii_compatible;
5100   struct charset *charset_roman, *charset_big5;
5101   int c;
5102
5103   CODING_GET_INFO (coding, attrs, charset_list);
5104   val = charset_list;
5105   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5106   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5107   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5108
5109   while (charbuf < charbuf_end)
5110     {
5111       ASSURE_DESTINATION (safe_room);
5112       c = *charbuf++;
5113       /* Now encode the character C.  */
5114       if (ASCII_CHAR_P (c) && ascii_compatible)
5115         EMIT_ONE_ASCII_BYTE (c);
5116       else if (CHAR_BYTE8_P (c))
5117         {
5118           c = CHAR_TO_BYTE8 (c);
5119           EMIT_ONE_BYTE (c);
5120         }
5121       else
5122         {
5123           unsigned code;
5124           struct charset *charset = char_charset (c, charset_list, &code);
5125
5126           if (! charset)
5127             {
5128               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5129                 {
5130                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5131                   charset = CHARSET_FROM_ID (charset_ascii);
5132                 }
5133               else
5134                 {
5135                   c = coding->default_char;
5136                   charset = char_charset (c, charset_list, &code);
5137                 }
5138             }
5139           if (code == CHARSET_INVALID_CODE (charset))
5140             abort ();
5141           if (charset == charset_big5)
5142             {
5143               int c1, c2;
5144
5145               c1 = code >> 8, c2 = code & 0xFF;
5146               EMIT_TWO_BYTES (c1, c2);
5147             }
5148           else
5149             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5150         }
5151     }
5152   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5153   coding->produced_char += produced_chars;
5154   coding->produced = dst - coding->destination;
5155   return 0;
5156 }
5157
5158 \f
5159 /*** 10. CCL handlers ***/
5160
5161 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5162    Check if a text is encoded in a coding system of which
5163    encoder/decoder are written in CCL program.  If it is, return
5164    CATEGORY_MASK_CCL, else return 0.  */
5165
5166 static int
5167 detect_coding_ccl (coding, detect_info)
5168      struct coding_system *coding;
5169      struct coding_detection_info *detect_info;
5170 {
5171   const unsigned char *src = coding->source, *src_base;
5172   const unsigned char *src_end = coding->source + coding->src_bytes;
5173   int multibytep = coding->src_multibyte;
5174   int consumed_chars = 0;
5175   int found = 0;
5176   unsigned char *valids;
5177   int head_ascii = coding->head_ascii;
5178   Lisp_Object attrs;
5179
5180   detect_info->checked |= CATEGORY_MASK_CCL;
5181
5182   coding = &coding_categories[coding_category_ccl];
5183   valids = CODING_CCL_VALIDS (coding);
5184   attrs = CODING_ID_ATTRS (coding->id);
5185   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5186     src += head_ascii;
5187
5188   while (1)
5189     {
5190       int c;
5191
5192       src_base = src;
5193       ONE_MORE_BYTE (c);
5194       if (c < 0 || ! valids[c])
5195         break;
5196       if ((valids[c] > 1))
5197         found = CATEGORY_MASK_CCL;
5198     }
5199   detect_info->rejected |= CATEGORY_MASK_CCL;
5200   return 0;
5201
5202  no_more_source:
5203   detect_info->found |= found;
5204   return 1;
5205 }
5206
5207 static void
5208 decode_coding_ccl (coding)
5209      struct coding_system *coding;
5210 {
5211   const unsigned char *src = coding->source + coding->consumed;
5212   const unsigned char *src_end = coding->source + coding->src_bytes;
5213   int *charbuf = coding->charbuf + coding->charbuf_used;
5214   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5215   int consumed_chars = 0;
5216   int multibytep = coding->src_multibyte;
5217   struct ccl_program ccl;
5218   int source_charbuf[1024];
5219   int source_byteidx[1024];
5220   Lisp_Object attrs, charset_list;
5221
5222   CODING_GET_INFO (coding, attrs, charset_list);
5223   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
5224
5225   while (src < src_end)
5226     {
5227       const unsigned char *p = src;
5228       int *source, *source_end;
5229       int i = 0;
5230
5231       if (multibytep)
5232         while (i < 1024 && p < src_end)
5233           {
5234             source_byteidx[i] = p - src;
5235             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5236           }
5237       else
5238         while (i < 1024 && p < src_end)
5239           source_charbuf[i++] = *p++;
5240
5241       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5242         ccl.last_block = 1;
5243
5244       source = source_charbuf;
5245       source_end = source + i;
5246       while (source < source_end)
5247         {
5248           ccl_driver (&ccl, source, charbuf,
5249                       source_end - source, charbuf_end - charbuf,
5250                       charset_list);
5251           source += ccl.consumed;
5252           charbuf += ccl.produced;
5253           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
5254             break;
5255         }
5256       if (source < source_end)
5257         src += source_byteidx[source - source_charbuf];
5258       else
5259         src = p;
5260       consumed_chars += source - source_charbuf;
5261
5262       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
5263           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
5264         break;
5265     }
5266
5267   switch (ccl.status)
5268     {
5269     case CCL_STAT_SUSPEND_BY_SRC:
5270       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5271       break;
5272     case CCL_STAT_SUSPEND_BY_DST:
5273       break;
5274     case CCL_STAT_QUIT:
5275     case CCL_STAT_INVALID_CMD:
5276       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5277       break;
5278     default:
5279       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5280       break;
5281     }
5282   coding->consumed_char += consumed_chars;
5283   coding->consumed = src - coding->source;
5284   coding->charbuf_used = charbuf - coding->charbuf;
5285 }
5286
5287 static int
5288 encode_coding_ccl (coding)
5289      struct coding_system *coding;
5290 {
5291   struct ccl_program ccl;
5292   int multibytep = coding->dst_multibyte;
5293   int *charbuf = coding->charbuf;
5294   int *charbuf_end = charbuf + coding->charbuf_used;
5295   unsigned char *dst = coding->destination + coding->produced;
5296   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5297   int destination_charbuf[1024];
5298   int i, produced_chars = 0;
5299   Lisp_Object attrs, charset_list;
5300
5301   CODING_GET_INFO (coding, attrs, charset_list);
5302   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
5303
5304   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
5305   ccl.dst_multibyte = coding->dst_multibyte;
5306
5307   while (charbuf < charbuf_end)
5308     {
5309       ccl_driver (&ccl, charbuf, destination_charbuf,
5310                   charbuf_end - charbuf, 1024, charset_list);
5311       if (multibytep)
5312         {
5313           ASSURE_DESTINATION (ccl.produced * 2);
5314           for (i = 0; i < ccl.produced; i++)
5315             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5316         }
5317       else
5318         {
5319           ASSURE_DESTINATION (ccl.produced);
5320           for (i = 0; i < ccl.produced; i++)
5321             *dst++ = destination_charbuf[i] & 0xFF;
5322           produced_chars += ccl.produced;
5323         }
5324       charbuf += ccl.consumed;
5325       if (ccl.status == CCL_STAT_QUIT
5326           || ccl.status == CCL_STAT_INVALID_CMD)
5327         break;
5328     }
5329
5330   switch (ccl.status)
5331     {
5332     case CCL_STAT_SUSPEND_BY_SRC:
5333       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5334       break;
5335     case CCL_STAT_SUSPEND_BY_DST:
5336       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5337       break;
5338     case CCL_STAT_QUIT:
5339     case CCL_STAT_INVALID_CMD:
5340       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5341       break;
5342     default:
5343       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5344       break;
5345     }
5346
5347   coding->produced_char += produced_chars;
5348   coding->produced = dst - coding->destination;
5349   return 0;
5350 }
5351
5352
5353 \f
5354 /*** 10, 11. no-conversion handlers ***/
5355
5356 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5357
5358 static void
5359 decode_coding_raw_text (coding)
5360      struct coding_system *coding;
5361 {
5362   int eol_crlf =
5363     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5364
5365   coding->chars_at_source = 1;
5366   coding->consumed_char = coding->src_chars;
5367   coding->consumed = coding->src_bytes;
5368   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5369     {
5370       coding->consumed_char--;
5371       coding->consumed--;
5372       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5373     }
5374   else
5375     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5376 }
5377
5378 static int
5379 encode_coding_raw_text (coding)
5380      struct coding_system *coding;
5381 {
5382   int multibytep = coding->dst_multibyte;
5383   int *charbuf = coding->charbuf;
5384   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5385   unsigned char *dst = coding->destination + coding->produced;
5386   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5387   int produced_chars = 0;
5388   int c;
5389
5390   if (multibytep)
5391     {
5392       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5393
5394       if (coding->src_multibyte)
5395         while (charbuf < charbuf_end)
5396           {
5397             ASSURE_DESTINATION (safe_room);
5398             c = *charbuf++;
5399             if (ASCII_CHAR_P (c))
5400               EMIT_ONE_ASCII_BYTE (c);
5401             else if (CHAR_BYTE8_P (c))
5402               {
5403                 c = CHAR_TO_BYTE8 (c);
5404                 EMIT_ONE_BYTE (c);
5405               }
5406             else
5407               {
5408                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5409
5410                 CHAR_STRING_ADVANCE (c, p1);
5411                 while (p0 < p1)
5412                   {
5413                     EMIT_ONE_BYTE (*p0);
5414                     p0++;
5415                   }
5416               }
5417           }
5418       else
5419         while (charbuf < charbuf_end)
5420           {
5421             ASSURE_DESTINATION (safe_room);
5422             c = *charbuf++;
5423             EMIT_ONE_BYTE (c);
5424           }
5425     }
5426   else
5427     {
5428       if (coding->src_multibyte)
5429         {
5430           int safe_room = MAX_MULTIBYTE_LENGTH;
5431
5432           while (charbuf < charbuf_end)
5433             {
5434               ASSURE_DESTINATION (safe_room);
5435               c = *charbuf++;
5436               if (ASCII_CHAR_P (c))
5437                 *dst++ = c;
5438               else if (CHAR_BYTE8_P (c))
5439                 *dst++ = CHAR_TO_BYTE8 (c);
5440               else
5441                 CHAR_STRING_ADVANCE (c, dst);
5442             }
5443         }
5444       else
5445         {
5446           ASSURE_DESTINATION (charbuf_end - charbuf);
5447           while (charbuf < charbuf_end && dst < dst_end)
5448             *dst++ = *charbuf++;
5449         }
5450       produced_chars = dst - (coding->destination + coding->produced);
5451     }
5452   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5453   coding->produced_char += produced_chars;
5454   coding->produced = dst - coding->destination;
5455   return 0;
5456 }
5457
5458 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5459    Check if a text is encoded in a charset-based coding system.  If it
5460    is, return 1, else return 0.  */
5461
5462 static int
5463 detect_coding_charset (coding, detect_info)
5464      struct coding_system *coding;
5465      struct coding_detection_info *detect_info;
5466 {
5467   const unsigned char *src = coding->source, *src_base;
5468   const unsigned char *src_end = coding->source + coding->src_bytes;
5469   int multibytep = coding->src_multibyte;
5470   int consumed_chars = 0;
5471   Lisp_Object attrs, valids, name;
5472   int found = 0;
5473   int head_ascii = coding->head_ascii;
5474   int check_latin_extra = 0;
5475
5476   detect_info->checked |= CATEGORY_MASK_CHARSET;
5477
5478   coding = &coding_categories[coding_category_charset];
5479   attrs = CODING_ID_ATTRS (coding->id);
5480   valids = AREF (attrs, coding_attr_charset_valids);
5481   name = CODING_ID_NAME (coding->id);
5482   if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5483                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5484       || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5485                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5486     check_latin_extra = 1;
5487
5488   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5489     src += head_ascii;
5490
5491   while (1)
5492     {
5493       int c;
5494       Lisp_Object val;
5495       struct charset *charset;
5496       int dim, idx;
5497
5498       src_base = src;
5499       ONE_MORE_BYTE (c);
5500       if (c < 0)
5501         continue;
5502       val = AREF (valids, c);
5503       if (NILP (val))
5504         break;
5505       if (c >= 0x80)
5506         {
5507           if (c < 0xA0
5508               && check_latin_extra
5509               && (!VECTORP (Vlatin_extra_code_table)
5510                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5511             break;
5512           found = CATEGORY_MASK_CHARSET;
5513         }
5514       if (INTEGERP (val))
5515         {
5516           charset = CHARSET_FROM_ID (XFASTINT (val));
5517           dim = CHARSET_DIMENSION (charset);
5518           for (idx = 1; idx < dim; idx++)
5519             {
5520               if (src == src_end)
5521                 goto too_short;
5522               ONE_MORE_BYTE (c);
5523               if (c < charset->code_space[(dim - 1 - idx) * 2]
5524                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5525                 break;
5526             }
5527           if (idx < dim)
5528             break;
5529         }
5530       else
5531         {
5532           idx = 1;
5533           for (; CONSP (val); val = XCDR (val))
5534             {
5535               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5536               dim = CHARSET_DIMENSION (charset);
5537               while (idx < dim)
5538                 {
5539                   if (src == src_end)
5540                     goto too_short;
5541                   ONE_MORE_BYTE (c);
5542                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5543                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5544                     break;
5545                   idx++;
5546                 }
5547               if (idx == dim)
5548                 {
5549                   val = Qnil;
5550                   break;
5551                 }
5552             }
5553           if (CONSP (val))
5554             break;
5555         }
5556     }
5557  too_short:
5558   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5559   return 0;
5560
5561  no_more_source:
5562   detect_info->found |= found;
5563   return 1;
5564 }
5565
5566 static void
5567 decode_coding_charset (coding)
5568      struct coding_system *coding;
5569 {
5570   const unsigned char *src = coding->source + coding->consumed;
5571   const unsigned char *src_end = coding->source + coding->src_bytes;
5572   const unsigned char *src_base;
5573   int *charbuf = coding->charbuf + coding->charbuf_used;
5574   /* We may produce one charset annocation in one loop and one more at
5575      the end.  */
5576   int *charbuf_end
5577     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5578   int consumed_chars = 0, consumed_chars_base;
5579   int multibytep = coding->src_multibyte;
5580   Lisp_Object attrs, charset_list, valids;
5581   int char_offset = coding->produced_char;
5582   int last_offset = char_offset;
5583   int last_id = charset_ascii;
5584   int eol_crlf =
5585     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5586   int byte_after_cr = -1;
5587
5588   CODING_GET_INFO (coding, attrs, charset_list);
5589   valids = AREF (attrs, coding_attr_charset_valids);
5590
5591   while (1)
5592     {
5593       int c;
5594       Lisp_Object val;
5595       struct charset *charset;
5596       int dim;
5597       int len = 1;
5598       unsigned code;
5599
5600       src_base = src;
5601       consumed_chars_base = consumed_chars;
5602
5603       if (charbuf >= charbuf_end)
5604         {
5605           if (byte_after_cr >= 0)
5606             src_base--;
5607           break;
5608         }
5609
5610       if (byte_after_cr >= 0)
5611         {
5612           c = byte_after_cr;
5613           byte_after_cr = -1;
5614         }
5615       else
5616         {
5617           ONE_MORE_BYTE (c);
5618           if (eol_crlf && c == '\r')
5619             ONE_MORE_BYTE (byte_after_cr);
5620         }
5621       if (c < 0)
5622         goto invalid_code;
5623       code = c;
5624
5625       val = AREF (valids, c);
5626       if (! INTEGERP (val) && ! CONSP (val))
5627         goto invalid_code;
5628       if (INTEGERP (val))
5629         {
5630           charset = CHARSET_FROM_ID (XFASTINT (val));
5631           dim = CHARSET_DIMENSION (charset);
5632           while (len < dim)
5633             {
5634               ONE_MORE_BYTE (c);
5635               code = (code << 8) | c;
5636               len++;
5637             }
5638           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5639                               charset, code, c);
5640         }
5641       else
5642         {
5643           /* VAL is a list of charset IDs.  It is assured that the
5644              list is sorted by charset dimensions (smaller one
5645              comes first).  */
5646           while (CONSP (val))
5647             {
5648               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5649               dim = CHARSET_DIMENSION (charset);
5650               while (len < dim)
5651                 {
5652                   ONE_MORE_BYTE (c);
5653                   code = (code << 8) | c;
5654                   len++;
5655                 }
5656               CODING_DECODE_CHAR (coding, src, src_base,
5657                                   src_end, charset, code, c);
5658               if (c >= 0)
5659                 break;
5660               val = XCDR (val);
5661             }
5662         }
5663       if (c < 0)
5664         goto invalid_code;
5665       if (charset->id != charset_ascii
5666           && last_id != charset->id)
5667         {
5668           if (last_id != charset_ascii)
5669             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5670           last_id = charset->id;
5671           last_offset = char_offset;
5672         }
5673
5674       *charbuf++ = c;
5675       char_offset++;
5676       continue;
5677
5678     invalid_code:
5679       src = src_base;
5680       consumed_chars = consumed_chars_base;
5681       ONE_MORE_BYTE (c);
5682       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5683       char_offset++;
5684       coding->errors++;
5685     }
5686
5687  no_more_source:
5688   if (last_id != charset_ascii)
5689     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5690   coding->consumed_char += consumed_chars_base;
5691   coding->consumed = src_base - coding->source;
5692   coding->charbuf_used = charbuf - coding->charbuf;
5693 }
5694
5695 static int
5696 encode_coding_charset (coding)
5697      struct coding_system *coding;
5698 {
5699   int multibytep = coding->dst_multibyte;
5700   int *charbuf = coding->charbuf;
5701   int *charbuf_end = charbuf + coding->charbuf_used;
5702   unsigned char *dst = coding->destination + coding->produced;
5703   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5704   int safe_room = MAX_MULTIBYTE_LENGTH;
5705   int produced_chars = 0;
5706   Lisp_Object attrs, charset_list;
5707   int ascii_compatible;
5708   int c;
5709
5710   CODING_GET_INFO (coding, attrs, charset_list);
5711   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5712
5713   while (charbuf < charbuf_end)
5714     {
5715       struct charset *charset;
5716       unsigned code;
5717
5718       ASSURE_DESTINATION (safe_room);
5719       c = *charbuf++;
5720       if (ascii_compatible && ASCII_CHAR_P (c))
5721         EMIT_ONE_ASCII_BYTE (c);
5722       else if (CHAR_BYTE8_P (c))
5723         {
5724           c = CHAR_TO_BYTE8 (c);
5725           EMIT_ONE_BYTE (c);
5726         }
5727       else
5728         {
5729           charset = char_charset (c, charset_list, &code);
5730           if (charset)
5731             {
5732               if (CHARSET_DIMENSION (charset) == 1)
5733                 EMIT_ONE_BYTE (code);
5734               else if (CHARSET_DIMENSION (charset) == 2)
5735                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5736               else if (CHARSET_DIMENSION (charset) == 3)
5737                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5738               else
5739                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5740                                  (code >> 8) & 0xFF, code & 0xFF);
5741             }
5742           else
5743             {
5744               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5745                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5746               else
5747                 c = coding->default_char;
5748               EMIT_ONE_BYTE (c);
5749             }
5750         }
5751     }
5752
5753   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5754   coding->produced_char += produced_chars;
5755   coding->produced = dst - coding->destination;
5756   return 0;
5757 }
5758
5759 \f
5760 /*** 7. C library functions ***/
5761
5762 /* Setup coding context CODING from information about CODING_SYSTEM.
5763    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5764    CODING_SYSTEM is invalid, signal an error.  */
5765
5766 void
5767 setup_coding_system (coding_system, coding)
5768      Lisp_Object coding_system;
5769      struct coding_system *coding;
5770 {
5771   Lisp_Object attrs;
5772   Lisp_Object eol_type;
5773   Lisp_Object coding_type;
5774   Lisp_Object val;
5775
5776   if (NILP (coding_system))
5777     coding_system = Qundecided;
5778
5779   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5780
5781   attrs = CODING_ID_ATTRS (coding->id);
5782   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5783
5784   coding->mode = 0;
5785   coding->head_ascii = -1;
5786   if (VECTORP (eol_type))
5787     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5788                             | CODING_REQUIRE_DETECTION_MASK);
5789   else if (! EQ (eol_type, Qunix))
5790     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5791                             | CODING_REQUIRE_ENCODING_MASK);
5792   else
5793     coding->common_flags = 0;
5794   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5795     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5796   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5797     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5798   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5799     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5800
5801   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5802   coding->max_charset_id = SCHARS (val) - 1;
5803   coding->safe_charsets = SDATA (val);
5804   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5805   coding->carryover_bytes = 0;
5806
5807   coding_type = CODING_ATTR_TYPE (attrs);
5808   if (EQ (coding_type, Qundecided))
5809     {
5810       coding->detector = NULL;
5811       coding->decoder = decode_coding_raw_text;
5812       coding->encoder = encode_coding_raw_text;
5813       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5814     }
5815   else if (EQ (coding_type, Qiso_2022))
5816     {
5817       int i;
5818       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5819
5820       /* Invoke graphic register 0 to plane 0.  */
5821       CODING_ISO_INVOCATION (coding, 0) = 0;
5822       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5823       CODING_ISO_INVOCATION (coding, 1)
5824         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5825       /* Setup the initial status of designation.  */
5826       for (i = 0; i < 4; i++)
5827         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5828       /* Not single shifting initially.  */
5829       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5830       /* Beginning of buffer should also be regarded as bol. */
5831       CODING_ISO_BOL (coding) = 1;
5832       coding->detector = detect_coding_iso_2022;
5833       coding->decoder = decode_coding_iso_2022;
5834       coding->encoder = encode_coding_iso_2022;
5835       if (flags & CODING_ISO_FLAG_SAFE)
5836         coding->mode |= CODING_MODE_SAFE_ENCODING;
5837       coding->common_flags
5838         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5839             | CODING_REQUIRE_FLUSHING_MASK);
5840       if (flags & CODING_ISO_FLAG_COMPOSITION)
5841         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5842       if (flags & CODING_ISO_FLAG_DESIGNATION)
5843         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5844       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5845         {
5846           setup_iso_safe_charsets (attrs);
5847           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5848           coding->max_charset_id = SCHARS (val) - 1;
5849           coding->safe_charsets = SDATA (val);
5850         }
5851       CODING_ISO_FLAGS (coding) = flags;
5852       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5853       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5854       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5855       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5856     }
5857   else if (EQ (coding_type, Qcharset))
5858     {
5859       coding->detector = detect_coding_charset;
5860       coding->decoder = decode_coding_charset;
5861       coding->encoder = encode_coding_charset;
5862       coding->common_flags
5863         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5864     }
5865   else if (EQ (coding_type, Qutf_8))
5866     {
5867       val = AREF (attrs, coding_attr_utf_bom);
5868       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5869                                    : EQ (val, Qt) ? utf_with_bom
5870                                    : utf_without_bom);
5871       coding->detector = detect_coding_utf_8;
5872       coding->decoder = decode_coding_utf_8;
5873       coding->encoder = encode_coding_utf_8;
5874       coding->common_flags
5875         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5876       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5877         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5878     }
5879   else if (EQ (coding_type, Qutf_16))
5880     {
5881       val = AREF (attrs, coding_attr_utf_bom);
5882       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5883                                     : EQ (val, Qt) ? utf_with_bom
5884                                     : utf_without_bom);
5885       val = AREF (attrs, coding_attr_utf_16_endian);
5886       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5887                                        : utf_16_little_endian);
5888       CODING_UTF_16_SURROGATE (coding) = 0;
5889       coding->detector = detect_coding_utf_16;
5890       coding->decoder = decode_coding_utf_16;
5891       coding->encoder = encode_coding_utf_16;
5892       coding->common_flags
5893         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5894       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5895         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5896     }
5897   else if (EQ (coding_type, Qccl))
5898     {
5899       coding->detector = detect_coding_ccl;
5900       coding->decoder = decode_coding_ccl;
5901       coding->encoder = encode_coding_ccl;
5902       coding->common_flags
5903         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5904             | CODING_REQUIRE_FLUSHING_MASK);
5905     }
5906   else if (EQ (coding_type, Qemacs_mule))
5907     {
5908       coding->detector = detect_coding_emacs_mule;
5909       coding->decoder = decode_coding_emacs_mule;
5910       coding->encoder = encode_coding_emacs_mule;
5911       coding->common_flags
5912         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5913       coding->spec.emacs_mule.full_support = 1;
5914       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5915           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5916         {
5917           Lisp_Object tail, safe_charsets;
5918           int max_charset_id = 0;
5919
5920           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5921                tail = XCDR (tail))
5922             if (max_charset_id < XFASTINT (XCAR (tail)))
5923               max_charset_id = XFASTINT (XCAR (tail));
5924           safe_charsets = make_uninit_string (max_charset_id + 1);
5925           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5926           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5927                tail = XCDR (tail))
5928             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5929           coding->max_charset_id = max_charset_id;
5930           coding->safe_charsets = SDATA (safe_charsets);
5931           coding->spec.emacs_mule.full_support = 1;
5932         }
5933       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5934       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5935     }
5936   else if (EQ (coding_type, Qshift_jis))
5937     {
5938       coding->detector = detect_coding_sjis;
5939       coding->decoder = decode_coding_sjis;
5940       coding->encoder = encode_coding_sjis;
5941       coding->common_flags
5942         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5943     }
5944   else if (EQ (coding_type, Qbig5))
5945     {
5946       coding->detector = detect_coding_big5;
5947       coding->decoder = decode_coding_big5;
5948       coding->encoder = encode_coding_big5;
5949       coding->common_flags
5950         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5951     }
5952   else                          /* EQ (coding_type, Qraw_text) */
5953     {
5954       coding->detector = NULL;
5955       coding->decoder = decode_coding_raw_text;
5956       coding->encoder = encode_coding_raw_text;
5957       if (! EQ (eol_type, Qunix))
5958         {
5959           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5960           if (! VECTORP (eol_type))
5961             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5962         }
5963
5964     }
5965
5966   return;
5967 }
5968
5969 /* Return a list of charsets supported by CODING.  */
5970
5971 Lisp_Object
5972 coding_charset_list (coding)
5973      struct coding_system *coding;
5974 {
5975   Lisp_Object attrs, charset_list;
5976
5977   CODING_GET_INFO (coding, attrs, charset_list);
5978   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5979     {
5980       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5981
5982       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5983         charset_list = Viso_2022_charset_list;
5984     }
5985   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5986     {
5987       charset_list = Vemacs_mule_charset_list;
5988     }
5989   return charset_list;
5990 }
5991
5992
5993 /* Return a list of charsets supported by CODING-SYSTEM.  */
5994
5995 Lisp_Object
5996 coding_system_charset_list (coding_system)
5997      Lisp_Object coding_system;
5998 {
5999   int id;
6000   Lisp_Object attrs, charset_list;
6001
6002   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
6003   attrs = CODING_ID_ATTRS (id);
6004
6005   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
6006     {
6007       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
6008
6009       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
6010         charset_list = Viso_2022_charset_list;
6011       else
6012         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6013     }
6014   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
6015     {
6016       charset_list = Vemacs_mule_charset_list;
6017     }
6018   else
6019     {
6020       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6021     }
6022   return charset_list;
6023 }
6024
6025
6026 /* Return raw-text or one of its subsidiaries that has the same
6027    eol_type as CODING-SYSTEM.  */
6028
6029 Lisp_Object
6030 raw_text_coding_system (coding_system)
6031      Lisp_Object coding_system;
6032 {
6033   Lisp_Object spec, attrs;
6034   Lisp_Object eol_type, raw_text_eol_type;
6035
6036   if (NILP (coding_system))
6037     return Qraw_text;
6038   spec = CODING_SYSTEM_SPEC (coding_system);
6039   attrs = AREF (spec, 0);
6040
6041   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
6042     return coding_system;
6043
6044   eol_type = AREF (spec, 2);
6045   if (VECTORP (eol_type))
6046     return Qraw_text;
6047   spec = CODING_SYSTEM_SPEC (Qraw_text);
6048   raw_text_eol_type = AREF (spec, 2);
6049   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6050           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6051           : AREF (raw_text_eol_type, 2));
6052 }
6053
6054
6055 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
6056    does, return one of the subsidiary that has the same eol-spec as
6057    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
6058    inherit end-of-line format from the system's setting
6059    (system_eol_type).  */
6060
6061 Lisp_Object
6062 coding_inherit_eol_type (coding_system, parent)
6063      Lisp_Object coding_system, parent;
6064 {
6065   Lisp_Object spec, eol_type;
6066
6067   if (NILP (coding_system))
6068     coding_system = Qraw_text;
6069   spec = CODING_SYSTEM_SPEC (coding_system);
6070   eol_type = AREF (spec, 2);
6071   if (VECTORP (eol_type))
6072     {
6073       Lisp_Object parent_eol_type;
6074
6075       if (! NILP (parent))
6076         {
6077           Lisp_Object parent_spec;
6078
6079           parent_spec = CODING_SYSTEM_SPEC (parent);
6080           parent_eol_type = AREF (parent_spec, 2);
6081         }
6082       else
6083         parent_eol_type = system_eol_type;
6084       if (EQ (parent_eol_type, Qunix))
6085         coding_system = AREF (eol_type, 0);
6086       else if (EQ (parent_eol_type, Qdos))
6087         coding_system = AREF (eol_type, 1);
6088       else if (EQ (parent_eol_type, Qmac))
6089         coding_system = AREF (eol_type, 2);
6090     }
6091   return coding_system;
6092 }
6093
6094 /* Emacs has a mechanism to automatically detect a coding system if it
6095    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6096    it's impossible to distinguish some coding systems accurately
6097    because they use the same range of codes.  So, at first, coding
6098    systems are categorized into 7, those are:
6099
6100    o coding-category-emacs-mule
6101
6102         The category for a coding system which has the same code range
6103         as Emacs' internal format.  Assigned the coding-system (Lisp
6104         symbol) `emacs-mule' by default.
6105
6106    o coding-category-sjis
6107
6108         The category for a coding system which has the same code range
6109         as SJIS.  Assigned the coding-system (Lisp
6110         symbol) `japanese-shift-jis' by default.
6111
6112    o coding-category-iso-7
6113
6114         The category for a coding system which has the same code range
6115         as ISO2022 of 7-bit environment.  This doesn't use any locking
6116         shift and single shift functions.  This can encode/decode all
6117         charsets.  Assigned the coding-system (Lisp symbol)
6118         `iso-2022-7bit' by default.
6119
6120    o coding-category-iso-7-tight
6121
6122         Same as coding-category-iso-7 except that this can
6123         encode/decode only the specified charsets.
6124
6125    o coding-category-iso-8-1
6126
6127         The category for a coding system which has the same code range
6128         as ISO2022 of 8-bit environment and graphic plane 1 used only
6129         for DIMENSION1 charset.  This doesn't use any locking shift
6130         and single shift functions.  Assigned the coding-system (Lisp
6131         symbol) `iso-latin-1' by default.
6132
6133    o coding-category-iso-8-2
6134
6135         The category for a coding system which has the same code range
6136         as ISO2022 of 8-bit environment and graphic plane 1 used only
6137         for DIMENSION2 charset.  This doesn't use any locking shift
6138         and single shift functions.  Assigned the coding-system (Lisp
6139         symbol) `japanese-iso-8bit' by default.
6140
6141    o coding-category-iso-7-else
6142
6143         The category for a coding system which has the same code range
6144         as ISO2022 of 7-bit environemnt but uses locking shift or
6145         single shift functions.  Assigned the coding-system (Lisp
6146         symbol) `iso-2022-7bit-lock' by default.
6147
6148    o coding-category-iso-8-else
6149
6150         The category for a coding system which has the same code range
6151         as ISO2022 of 8-bit environemnt but uses locking shift or
6152         single shift functions.  Assigned the coding-system (Lisp
6153         symbol) `iso-2022-8bit-ss2' by default.
6154
6155    o coding-category-big5
6156
6157         The category for a coding system which has the same code range
6158         as BIG5.  Assigned the coding-system (Lisp symbol)
6159         `cn-big5' by default.
6160
6161    o coding-category-utf-8
6162
6163         The category for a coding system which has the same code range
6164         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6165         symbol) `utf-8' by default.
6166
6167    o coding-category-utf-16-be
6168
6169         The category for a coding system in which a text has an
6170         Unicode signature (cf. Unicode Standard) in the order of BIG
6171         endian at the head.  Assigned the coding-system (Lisp symbol)
6172         `utf-16-be' by default.
6173
6174    o coding-category-utf-16-le
6175
6176         The category for a coding system in which a text has an
6177         Unicode signature (cf. Unicode Standard) in the order of
6178         LITTLE endian at the head.  Assigned the coding-system (Lisp
6179         symbol) `utf-16-le' by default.
6180
6181    o coding-category-ccl
6182
6183         The category for a coding system of which encoder/decoder is
6184         written in CCL programs.  The default value is nil, i.e., no
6185         coding system is assigned.
6186
6187    o coding-category-binary
6188
6189         The category for a coding system not categorized in any of the
6190         above.  Assigned the coding-system (Lisp symbol)
6191         `no-conversion' by default.
6192
6193    Each of them is a Lisp symbol and the value is an actual
6194    `coding-system's (this is also a Lisp symbol) assigned by a user.
6195    What Emacs does actually is to detect a category of coding system.
6196    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6197    decide only one possible category, it selects a category of the
6198    highest priority.  Priorities of categories are also specified by a
6199    user in a Lisp variable `coding-category-list'.
6200
6201 */
6202
6203 #define EOL_SEEN_NONE   0
6204 #define EOL_SEEN_LF     1
6205 #define EOL_SEEN_CR     2
6206 #define EOL_SEEN_CRLF   4
6207
6208 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6209    SOURCE is encoded.  If CATEGORY is one of
6210    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6211    two-byte, else they are encoded by one-byte.
6212
6213    Return one of EOL_SEEN_XXX.  */
6214
6215 #define MAX_EOL_CHECK_COUNT 3
6216
6217 static int
6218 detect_eol (source, src_bytes, category)
6219      const unsigned char *source;
6220      EMACS_INT src_bytes;
6221      enum coding_category category;
6222 {
6223   const unsigned char *src = source, *src_end = src + src_bytes;
6224   unsigned char c;
6225   int total  = 0;
6226   int eol_seen = EOL_SEEN_NONE;
6227
6228   if ((1 << category) & CATEGORY_MASK_UTF_16)
6229     {
6230       int msb, lsb;
6231
6232       msb = category == (coding_category_utf_16_le
6233                          | coding_category_utf_16_le_nosig);
6234       lsb = 1 - msb;
6235
6236       while (src + 1 < src_end)
6237         {
6238           c = src[lsb];
6239           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6240             {
6241               int this_eol;
6242
6243               if (c == '\n')
6244                 this_eol = EOL_SEEN_LF;
6245               else if (src + 3 >= src_end
6246                        || src[msb + 2] != 0
6247                        || src[lsb + 2] != '\n')
6248                 this_eol = EOL_SEEN_CR;
6249               else
6250                 {
6251                   this_eol = EOL_SEEN_CRLF;
6252                   src += 2;
6253                 }
6254
6255               if (eol_seen == EOL_SEEN_NONE)
6256                 /* This is the first end-of-line.  */
6257                 eol_seen = this_eol;
6258               else if (eol_seen != this_eol)
6259                 {
6260                   /* The found type is different from what found before.
6261                      Allow for stray ^M characters in DOS EOL files.  */
6262                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6263                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6264                     eol_seen = EOL_SEEN_CRLF;
6265                   else
6266                     {
6267                       eol_seen = EOL_SEEN_LF;
6268                       break;
6269                     }
6270                 }
6271               if (++total == MAX_EOL_CHECK_COUNT)
6272                 break;
6273             }
6274           src += 2;
6275         }
6276     }
6277   else
6278     {
6279       while (src < src_end)
6280         {
6281           c = *src++;
6282           if (c == '\n' || c == '\r')
6283             {
6284               int this_eol;
6285
6286               if (c == '\n')
6287                 this_eol = EOL_SEEN_LF;
6288               else if (src >= src_end || *src != '\n')
6289                 this_eol = EOL_SEEN_CR;
6290               else
6291                 this_eol = EOL_SEEN_CRLF, src++;
6292
6293               if (eol_seen == EOL_SEEN_NONE)
6294                 /* This is the first end-of-line.  */
6295                 eol_seen = this_eol;
6296               else if (eol_seen != this_eol)
6297                 {
6298                   /* The found type is different from what found before.
6299                      Allow for stray ^M characters in DOS EOL files.  */
6300                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6301                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6302                     eol_seen = EOL_SEEN_CRLF;
6303                   else
6304                     {
6305                       eol_seen = EOL_SEEN_LF;
6306                       break;
6307                     }
6308                 }
6309               if (++total == MAX_EOL_CHECK_COUNT)
6310                 break;
6311             }
6312         }
6313     }
6314   return eol_seen;
6315 }
6316
6317
6318 static Lisp_Object
6319 adjust_coding_eol_type (coding, eol_seen)
6320      struct coding_system *coding;
6321      int eol_seen;
6322 {
6323   Lisp_Object eol_type;
6324
6325   eol_type = CODING_ID_EOL_TYPE (coding->id);
6326   if (eol_seen & EOL_SEEN_LF)
6327     {
6328       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6329       eol_type = Qunix;
6330     }
6331   else if (eol_seen & EOL_SEEN_CRLF)
6332     {
6333       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6334       eol_type = Qdos;
6335     }
6336   else if (eol_seen & EOL_SEEN_CR)
6337     {
6338       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6339       eol_type = Qmac;
6340     }
6341   return eol_type;
6342 }
6343
6344 /* Detect how a text specified in CODING is encoded.  If a coding
6345    system is detected, update fields of CODING by the detected coding
6346    system.  */
6347
6348 void
6349 detect_coding (coding)
6350      struct coding_system *coding;
6351 {
6352   const unsigned char *src, *src_end;
6353   int saved_mode = coding->mode;
6354
6355   coding->consumed = coding->consumed_char = 0;
6356   coding->produced = coding->produced_char = 0;
6357   coding_set_source (coding);
6358
6359   src_end = coding->source + coding->src_bytes;
6360   coding->head_ascii = 0;
6361
6362   /* If we have not yet decided the text encoding type, detect it
6363      now.  */
6364   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6365     {
6366       int c, i;
6367       struct coding_detection_info detect_info;
6368       int null_byte_found = 0, eight_bit_found = 0;
6369
6370       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6371       for (src = coding->source; src < src_end; src++)
6372         {
6373           c = *src;
6374           if (c & 0x80)
6375             {
6376               eight_bit_found = 1;
6377               if (null_byte_found)
6378                 break;
6379             }
6380           else if (c < 0x20)
6381             {
6382               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6383                   && ! inhibit_iso_escape_detection
6384                   && ! detect_info.checked)
6385                 {
6386                   if (detect_coding_iso_2022 (coding, &detect_info))
6387                     {
6388                       /* We have scanned the whole data.  */
6389                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6390                         {
6391                           /* We didn't find an 8-bit code.  We may
6392                              have found a null-byte, but it's very
6393                              rare that a binary file confirm to
6394                              ISO-2022.  */
6395                           src = src_end;
6396                           coding->head_ascii = src - coding->source;
6397                         }
6398                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6399                       break;
6400                     }
6401                 }
6402               else if (! c && !inhibit_null_byte_detection)
6403                 {
6404                   null_byte_found = 1;
6405                   if (eight_bit_found)
6406                     break;
6407                 }
6408               if (! eight_bit_found)
6409                 coding->head_ascii++;
6410             }
6411           else if (! eight_bit_found)
6412             coding->head_ascii++;
6413         }
6414
6415       if (null_byte_found || eight_bit_found
6416           || coding->head_ascii < coding->src_bytes
6417           || detect_info.found)
6418         {
6419           enum coding_category category;
6420           struct coding_system *this;
6421
6422           if (coding->head_ascii == coding->src_bytes)
6423             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6424             for (i = 0; i < coding_category_raw_text; i++)
6425               {
6426                 category = coding_priorities[i];
6427                 this = coding_categories + category;
6428                 if (detect_info.found & (1 << category))
6429                   break;
6430               }
6431           else
6432             {
6433               if (null_byte_found)
6434                 {
6435                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6436                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6437                 }
6438               for (i = 0; i < coding_category_raw_text; i++)
6439                 {
6440                   category = coding_priorities[i];
6441                   this = coding_categories + category;
6442                   if (this->id < 0)
6443                     {
6444                       /* No coding system of this category is defined.  */
6445                       detect_info.rejected |= (1 << category);
6446                     }
6447                   else if (category >= coding_category_raw_text)
6448                     continue;
6449                   else if (detect_info.checked & (1 << category))
6450                     {
6451                       if (detect_info.found & (1 << category))
6452                         break;
6453                     }
6454                   else if ((*(this->detector)) (coding, &detect_info)
6455                            && detect_info.found & (1 << category))
6456                     {
6457                       if (category == coding_category_utf_16_auto)
6458                         {
6459                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6460                             category = coding_category_utf_16_le;
6461                           else
6462                             category = coding_category_utf_16_be;
6463                         }
6464                       break;
6465                     }
6466                 }
6467             }
6468
6469           if (i < coding_category_raw_text)
6470             setup_coding_system (CODING_ID_NAME (this->id), coding);
6471           else if (null_byte_found)
6472             setup_coding_system (Qno_conversion, coding);
6473           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6474                    == CATEGORY_MASK_ANY)
6475             setup_coding_system (Qraw_text, coding);
6476           else if (detect_info.rejected)
6477             for (i = 0; i < coding_category_raw_text; i++)
6478               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6479                 {
6480                   this = coding_categories + coding_priorities[i];
6481                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6482                   break;
6483                 }
6484         }
6485     }
6486   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6487            == coding_category_utf_8_auto)
6488     {
6489       Lisp_Object coding_systems;
6490       struct coding_detection_info detect_info;
6491
6492       coding_systems
6493         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6494       detect_info.found = detect_info.rejected = 0;
6495       coding->head_ascii = 0;
6496       if (CONSP (coding_systems)
6497           && detect_coding_utf_8 (coding, &detect_info))
6498         {
6499           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6500             setup_coding_system (XCAR (coding_systems), coding);
6501           else
6502             setup_coding_system (XCDR (coding_systems), coding);
6503         }
6504     }
6505   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6506            == coding_category_utf_16_auto)
6507     {
6508       Lisp_Object coding_systems;
6509       struct coding_detection_info detect_info;
6510
6511       coding_systems
6512         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6513       detect_info.found = detect_info.rejected = 0;
6514       coding->head_ascii = 0;
6515       if (CONSP (coding_systems)
6516           && detect_coding_utf_16 (coding, &detect_info))
6517         {
6518           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6519             setup_coding_system (XCAR (coding_systems), coding);
6520           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6521             setup_coding_system (XCDR (coding_systems), coding);
6522         }
6523     }
6524   coding->mode = saved_mode;
6525 }
6526
6527
6528 static void
6529 decode_eol (coding)
6530      struct coding_system *coding;
6531 {
6532   Lisp_Object eol_type;
6533   unsigned char *p, *pbeg, *pend;
6534
6535   eol_type = CODING_ID_EOL_TYPE (coding->id);
6536   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6537     return;
6538
6539   if (NILP (coding->dst_object))
6540     pbeg = coding->destination;
6541   else
6542     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6543   pend = pbeg + coding->produced;
6544
6545   if (VECTORP (eol_type))
6546     {
6547       int eol_seen = EOL_SEEN_NONE;
6548
6549       for (p = pbeg; p < pend; p++)
6550         {
6551           if (*p == '\n')
6552             eol_seen |= EOL_SEEN_LF;
6553           else if (*p == '\r')
6554             {
6555               if (p + 1 < pend && *(p + 1) == '\n')
6556                 {
6557                   eol_seen |= EOL_SEEN_CRLF;
6558                   p++;
6559                 }
6560               else
6561                 eol_seen |= EOL_SEEN_CR;
6562             }
6563         }
6564       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6565       if ((eol_seen & EOL_SEEN_CRLF) != 0
6566           && (eol_seen & EOL_SEEN_CR) != 0
6567           && (eol_seen & EOL_SEEN_LF) == 0)
6568         eol_seen = EOL_SEEN_CRLF;
6569       else if (eol_seen != EOL_SEEN_NONE
6570           && eol_seen != EOL_SEEN_LF
6571           && eol_seen != EOL_SEEN_CRLF
6572           && eol_seen != EOL_SEEN_CR)
6573         eol_seen = EOL_SEEN_LF;
6574       if (eol_seen != EOL_SEEN_NONE)
6575         eol_type = adjust_coding_eol_type (coding, eol_seen);
6576     }
6577
6578   if (EQ (eol_type, Qmac))
6579     {
6580       for (p = pbeg; p < pend; p++)
6581         if (*p == '\r')
6582           *p = '\n';
6583     }
6584   else if (EQ (eol_type, Qdos))
6585     {
6586       int n = 0;
6587
6588       if (NILP (coding->dst_object))
6589         {
6590           /* Start deleting '\r' from the tail to minimize the memory
6591              movement.  */
6592           for (p = pend - 2; p >= pbeg; p--)
6593             if (*p == '\r')
6594               {
6595                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6596                 n++;
6597               }
6598         }
6599       else
6600         {
6601           int pos_byte = coding->dst_pos_byte;
6602           int pos = coding->dst_pos;
6603           int pos_end = pos + coding->produced_char - 1;
6604
6605           while (pos < pos_end)
6606             {
6607               p = BYTE_POS_ADDR (pos_byte);
6608               if (*p == '\r' && p[1] == '\n')
6609                 {
6610                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6611                   n++;
6612                   pos_end--;
6613                 }
6614               pos++;
6615               if (coding->dst_multibyte)
6616                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6617               else
6618                 pos_byte++;
6619             }
6620         }
6621       coding->produced -= n;
6622       coding->produced_char -= n;
6623     }
6624 }
6625
6626
6627 /* Return a translation table (or list of them) from coding system
6628    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6629    decoding (ENCODEP is zero). */
6630
6631 static Lisp_Object
6632 get_translation_table (attrs, encodep, max_lookup)
6633      Lisp_Object attrs;
6634      int encodep, *max_lookup;
6635 {
6636   Lisp_Object standard, translation_table;
6637   Lisp_Object val;
6638
6639   if (NILP (Venable_character_translation))
6640     {
6641       if (max_lookup)
6642         *max_lookup = 0;
6643       return Qnil;
6644     }
6645   if (encodep)
6646     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6647       standard = Vstandard_translation_table_for_encode;
6648   else
6649     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6650       standard = Vstandard_translation_table_for_decode;
6651   if (NILP (translation_table))
6652     translation_table = standard;
6653   else
6654     {
6655       if (SYMBOLP (translation_table))
6656         translation_table = Fget (translation_table, Qtranslation_table);
6657       else if (CONSP (translation_table))
6658         {
6659           translation_table = Fcopy_sequence (translation_table);
6660           for (val = translation_table; CONSP (val); val = XCDR (val))
6661             if (SYMBOLP (XCAR (val)))
6662               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6663         }
6664       if (CHAR_TABLE_P (standard))
6665         {
6666           if (CONSP (translation_table))
6667             translation_table = nconc2 (translation_table,
6668                                         Fcons (standard, Qnil));
6669           else
6670             translation_table = Fcons (translation_table,
6671                                        Fcons (standard, Qnil));
6672         }
6673     }
6674
6675   if (max_lookup)
6676     {
6677       *max_lookup = 1;
6678       if (CHAR_TABLE_P (translation_table)
6679           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6680         {
6681           val = XCHAR_TABLE (translation_table)->extras[1];
6682           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6683             *max_lookup = XFASTINT (val);
6684         }
6685       else if (CONSP (translation_table))
6686         {
6687           Lisp_Object tail, val;
6688
6689           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6690             if (CHAR_TABLE_P (XCAR (tail))
6691                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6692               {
6693                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6694                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6695                   *max_lookup = XFASTINT (val);
6696               }
6697         }
6698     }
6699   return translation_table;
6700 }
6701
6702 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6703   do {                                                          \
6704     trans = Qnil;                                               \
6705     if (CHAR_TABLE_P (table))                                   \
6706       {                                                         \
6707         trans = CHAR_TABLE_REF (table, c);                      \
6708         if (CHARACTERP (trans))                                 \
6709           c = XFASTINT (trans), trans = Qnil;                   \
6710       }                                                         \
6711     else if (CONSP (table))                                     \
6712       {                                                         \
6713         Lisp_Object tail;                                       \
6714                                                                 \
6715         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6716           if (CHAR_TABLE_P (XCAR (tail)))                       \
6717             {                                                   \
6718               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6719               if (CHARACTERP (trans))                           \
6720                 c = XFASTINT (trans), trans = Qnil;             \
6721               else if (! NILP (trans))                          \
6722                 break;                                          \
6723             }                                                   \
6724       }                                                         \
6725   } while (0)
6726
6727
6728 /* Return a translation of character(s) at BUF according to TRANS.
6729    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6730    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6731    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6732    translation is found, and Qnil if not found..
6733    If BUF is too short to lookup characters in FROM, return Qt.  */
6734
6735 static Lisp_Object
6736 get_translation (trans, buf, buf_end)
6737      Lisp_Object trans;
6738      int *buf, *buf_end;
6739 {
6740
6741   if (INTEGERP (trans))
6742     return trans;
6743   for (; CONSP (trans); trans = XCDR (trans))
6744     {
6745       Lisp_Object val = XCAR (trans);
6746       Lisp_Object from = XCAR (val);
6747       int len = ASIZE (from);
6748       int i;
6749
6750       for (i = 0; i < len; i++)
6751         {
6752           if (buf + i == buf_end)
6753             return Qt;
6754           if (XINT (AREF (from, i)) != buf[i])
6755             break;
6756         }
6757       if (i == len)
6758         return val;
6759     }
6760   return Qnil;
6761 }
6762
6763
6764 static int
6765 produce_chars (coding, translation_table, last_block)
6766      struct coding_system *coding;
6767      Lisp_Object translation_table;
6768      int last_block;
6769 {
6770   unsigned char *dst = coding->destination + coding->produced;
6771   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6772   EMACS_INT produced;
6773   EMACS_INT produced_chars = 0;
6774   int carryover = 0;
6775
6776   if (! coding->chars_at_source)
6777     {
6778       /* Source characters are in coding->charbuf.  */
6779       int *buf = coding->charbuf;
6780       int *buf_end = buf + coding->charbuf_used;
6781
6782       if (EQ (coding->src_object, coding->dst_object))
6783         {
6784           coding_set_source (coding);
6785           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6786         }
6787
6788       while (buf < buf_end)
6789         {
6790           int c = *buf, i;
6791
6792           if (c >= 0)
6793             {
6794               int from_nchars = 1, to_nchars = 1;
6795               Lisp_Object trans = Qnil;
6796
6797               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6798               if (! NILP (trans))
6799                 {
6800                   trans = get_translation (trans, buf, buf_end);
6801                   if (INTEGERP (trans))
6802                     c = XINT (trans);
6803                   else if (CONSP (trans))
6804                     {
6805                       from_nchars = ASIZE (XCAR (trans));
6806                       trans = XCDR (trans);
6807                       if (INTEGERP (trans))
6808                         c = XINT (trans);
6809                       else
6810                         {
6811                           to_nchars = ASIZE (trans);
6812                           c = XINT (AREF (trans, 0));
6813                         }
6814                     }
6815                   else if (EQ (trans, Qt) && ! last_block)
6816                     break;
6817                 }
6818
6819               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6820                 {
6821                   dst = alloc_destination (coding,
6822                                            buf_end - buf
6823                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6824                                            dst);
6825                   if (EQ (coding->src_object, coding->dst_object))
6826                     {
6827                       coding_set_source (coding);
6828                       dst_end = (((unsigned char *) coding->source)
6829                                  + coding->consumed);
6830                     }
6831                   else
6832                     dst_end = coding->destination + coding->dst_bytes;
6833                 }
6834
6835               for (i = 0; i < to_nchars; i++)
6836                 {
6837                   if (i > 0)
6838                     c = XINT (AREF (trans, i));
6839                   if (coding->dst_multibyte
6840                       || ! CHAR_BYTE8_P (c))
6841                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6842                   else
6843                     *dst++ = CHAR_TO_BYTE8 (c);
6844                 }
6845               produced_chars += to_nchars;
6846               buf += from_nchars;
6847             }
6848           else
6849             /* This is an annotation datum.  (-C) is the length.  */
6850             buf += -c;
6851         }
6852       carryover = buf_end - buf;
6853     }
6854   else
6855     {
6856       /* Source characters are at coding->source.  */
6857       const unsigned char *src = coding->source;
6858       const unsigned char *src_end = src + coding->consumed;
6859
6860       if (EQ (coding->dst_object, coding->src_object))
6861         dst_end = (unsigned char *) src;
6862       if (coding->src_multibyte != coding->dst_multibyte)
6863         {
6864           if (coding->src_multibyte)
6865             {
6866               int multibytep = 1;
6867               EMACS_INT consumed_chars = 0;
6868
6869               while (1)
6870                 {
6871                   const unsigned char *src_base = src;
6872                   int c;
6873
6874                   ONE_MORE_BYTE (c);
6875                   if (dst == dst_end)
6876                     {
6877                       if (EQ (coding->src_object, coding->dst_object))
6878                         dst_end = (unsigned char *) src;
6879                       if (dst == dst_end)
6880                         {
6881                           EMACS_INT offset = src - coding->source;
6882
6883                           dst = alloc_destination (coding, src_end - src + 1,
6884                                                    dst);
6885                           dst_end = coding->destination + coding->dst_bytes;
6886                           coding_set_source (coding);
6887                           src = coding->source + offset;
6888                           src_end = coding->source + coding->src_bytes;
6889                           if (EQ (coding->src_object, coding->dst_object))
6890                             dst_end = (unsigned char *) src;
6891                         }
6892                     }
6893                   *dst++ = c;
6894                   produced_chars++;
6895                 }
6896             no_more_source:
6897               ;
6898             }
6899           else
6900             while (src < src_end)
6901               {
6902                 int multibytep = 1;
6903                 int c = *src++;
6904
6905                 if (dst >= dst_end - 1)
6906                   {
6907                     if (EQ (coding->src_object, coding->dst_object))
6908                       dst_end = (unsigned char *) src;
6909                     if (dst >= dst_end - 1)
6910                       {
6911                         EMACS_INT offset = src - coding->source;
6912                         EMACS_INT more_bytes;
6913
6914                         if (EQ (coding->src_object, coding->dst_object))
6915                           more_bytes = ((src_end - src) / 2) + 2;
6916                         else
6917                           more_bytes = src_end - src + 2;
6918                         dst = alloc_destination (coding, more_bytes, dst);
6919                         dst_end = coding->destination + coding->dst_bytes;
6920                         coding_set_source (coding);
6921                         src = coding->source + offset;
6922                         src_end = coding->source + coding->src_bytes;
6923                         if (EQ (coding->src_object, coding->dst_object))
6924                           dst_end = (unsigned char *) src;
6925                       }
6926                   }
6927                 EMIT_ONE_BYTE (c);
6928               }
6929         }
6930       else
6931         {
6932           if (!EQ (coding->src_object, coding->dst_object))
6933             {
6934               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6935
6936               if (require > 0)
6937                 {
6938                   EMACS_INT offset = src - coding->source;
6939
6940                   dst = alloc_destination (coding, require, dst);
6941                   coding_set_source (coding);
6942                   src = coding->source + offset;
6943                   src_end = coding->source + coding->src_bytes;
6944                 }
6945             }
6946           produced_chars = coding->consumed_char;
6947           while (src < src_end)
6948             *dst++ = *src++;
6949         }
6950     }
6951
6952   produced = dst - (coding->destination + coding->produced);
6953   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6954     insert_from_gap (produced_chars, produced);
6955   coding->produced += produced;
6956   coding->produced_char += produced_chars;
6957   return carryover;
6958 }
6959
6960 /* Compose text in CODING->object according to the annotation data at
6961    CHARBUF.  CHARBUF is an array:
6962      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6963  */
6964
6965 static INLINE void
6966 produce_composition (coding, charbuf, pos)
6967      struct coding_system *coding;
6968      int *charbuf;
6969      EMACS_INT pos;
6970 {
6971   int len;
6972   EMACS_INT to;
6973   enum composition_method method;
6974   Lisp_Object components;
6975
6976   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6977   to = pos + charbuf[2];
6978   method = (enum composition_method) (charbuf[4]);
6979
6980   if (method == COMPOSITION_RELATIVE)
6981     components = Qnil;
6982   else
6983     {
6984       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6985       int i, j;
6986
6987       if (method == COMPOSITION_WITH_RULE)
6988         len = charbuf[2] * 3 - 2;
6989       charbuf += MAX_ANNOTATION_LENGTH;
6990       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6991       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6992         {
6993           if (charbuf[i] >= 0)
6994             args[j] = make_number (charbuf[i]);
6995           else
6996             {
6997               i++;
6998               args[j] = make_number (charbuf[i] % 0x100);
6999             }
7000         }
7001       components = (i == j ? Fstring (j, args) : Fvector (j, args));
7002     }
7003   compose_text (pos, to, components, Qnil, coding->dst_object);
7004 }
7005
7006
7007 /* Put `charset' property on text in CODING->object according to
7008    the annotation data at CHARBUF.  CHARBUF is an array:
7009      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
7010  */
7011
7012 static INLINE void
7013 produce_charset (coding, charbuf, pos)
7014      struct coding_system *coding;
7015      int *charbuf;
7016      EMACS_INT pos;
7017 {
7018   EMACS_INT from = pos - charbuf[2];
7019   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
7020
7021   Fput_text_property (make_number (from), make_number (pos),
7022                       Qcharset, CHARSET_NAME (charset),
7023                       coding->dst_object);
7024 }
7025
7026
7027 #define CHARBUF_SIZE 0x4000
7028
7029 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
7030   do {                                                                  \
7031     int size = CHARBUF_SIZE;                                            \
7032                                                                         \
7033     coding->charbuf = NULL;                                             \
7034     while (size > 1024)                                                 \
7035       {                                                                 \
7036         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
7037         if (coding->charbuf)                                            \
7038           break;                                                        \
7039         size >>= 1;                                                     \
7040       }                                                                 \
7041     if (! coding->charbuf)                                              \
7042       {                                                                 \
7043         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
7044         return coding->result;                                          \
7045       }                                                                 \
7046     coding->charbuf_size = size;                                        \
7047   } while (0)
7048
7049
7050 static void
7051 produce_annotation (coding, pos)
7052      struct coding_system *coding;
7053      EMACS_INT pos;
7054 {
7055   int *charbuf = coding->charbuf;
7056   int *charbuf_end = charbuf + coding->charbuf_used;
7057
7058   if (NILP (coding->dst_object))
7059     return;
7060
7061   while (charbuf < charbuf_end)
7062     {
7063       if (*charbuf >= 0)
7064         pos++, charbuf++;
7065       else
7066         {
7067           int len = -*charbuf;
7068
7069           if (len > 2)
7070             switch (charbuf[1])
7071               {
7072               case CODING_ANNOTATE_COMPOSITION_MASK:
7073                 produce_composition (coding, charbuf, pos);
7074                 break;
7075               case CODING_ANNOTATE_CHARSET_MASK:
7076                 produce_charset (coding, charbuf, pos);
7077                 break;
7078               }
7079           charbuf += len;
7080         }
7081     }
7082 }
7083
7084 /* Decode the data at CODING->src_object into CODING->dst_object.
7085    CODING->src_object is a buffer, a string, or nil.
7086    CODING->dst_object is a buffer.
7087
7088    If CODING->src_object is a buffer, it must be the current buffer.
7089    In this case, if CODING->src_pos is positive, it is a position of
7090    the source text in the buffer, otherwise, the source text is in the
7091    gap area of the buffer, and CODING->src_pos specifies the offset of
7092    the text from GPT (which must be the same as PT).  If this is the
7093    same buffer as CODING->dst_object, CODING->src_pos must be
7094    negative.
7095
7096    If CODING->src_object is a string, CODING->src_pos is an index to
7097    that string.
7098
7099    If CODING->src_object is nil, CODING->source must already point to
7100    the non-relocatable memory area.  In this case, CODING->src_pos is
7101    an offset from CODING->source.
7102
7103    The decoded data is inserted at the current point of the buffer
7104    CODING->dst_object.
7105 */
7106
7107 static int
7108 decode_coding (coding)
7109      struct coding_system *coding;
7110 {
7111   Lisp_Object attrs;
7112   Lisp_Object undo_list;
7113   Lisp_Object translation_table;
7114   int carryover;
7115   int i;
7116
7117   if (BUFFERP (coding->src_object)
7118       && coding->src_pos > 0
7119       && coding->src_pos < GPT
7120       && coding->src_pos + coding->src_chars > GPT)
7121     move_gap_both (coding->src_pos, coding->src_pos_byte);
7122
7123   undo_list = Qt;
7124   if (BUFFERP (coding->dst_object))
7125     {
7126       if (current_buffer != XBUFFER (coding->dst_object))
7127         set_buffer_internal (XBUFFER (coding->dst_object));
7128       if (GPT != PT)
7129         move_gap_both (PT, PT_BYTE);
7130       undo_list = current_buffer->undo_list;
7131       current_buffer->undo_list = Qt;
7132     }
7133
7134   coding->consumed = coding->consumed_char = 0;
7135   coding->produced = coding->produced_char = 0;
7136   coding->chars_at_source = 0;
7137   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7138   coding->errors = 0;
7139
7140   ALLOC_CONVERSION_WORK_AREA (coding);
7141
7142   attrs = CODING_ID_ATTRS (coding->id);
7143   translation_table = get_translation_table (attrs, 0, NULL);
7144
7145   carryover = 0;
7146   do
7147     {
7148       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7149
7150       coding_set_source (coding);
7151       coding->annotated = 0;
7152       coding->charbuf_used = carryover;
7153       (*(coding->decoder)) (coding);
7154       coding_set_destination (coding);
7155       carryover = produce_chars (coding, translation_table, 0);
7156       if (coding->annotated)
7157         produce_annotation (coding, pos);
7158       for (i = 0; i < carryover; i++)
7159         coding->charbuf[i]
7160           = coding->charbuf[coding->charbuf_used - carryover + i];
7161     }
7162   while (coding->consumed < coding->src_bytes
7163          && (coding->result == CODING_RESULT_SUCCESS
7164              || coding->result == CODING_RESULT_INVALID_SRC));
7165
7166   if (carryover > 0)
7167     {
7168       coding_set_destination (coding);
7169       coding->charbuf_used = carryover;
7170       produce_chars (coding, translation_table, 1);
7171     }
7172
7173   coding->carryover_bytes = 0;
7174   if (coding->consumed < coding->src_bytes)
7175     {
7176       int nbytes = coding->src_bytes - coding->consumed;
7177       const unsigned char *src;
7178
7179       coding_set_source (coding);
7180       coding_set_destination (coding);
7181       src = coding->source + coding->consumed;
7182
7183       if (coding->mode & CODING_MODE_LAST_BLOCK)
7184         {
7185           /* Flush out unprocessed data as binary chars.  We are sure
7186              that the number of data is less than the size of
7187              coding->charbuf.  */
7188           coding->charbuf_used = 0;
7189           coding->chars_at_source = 0;
7190
7191           while (nbytes-- > 0)
7192             {
7193               int c = *src++;
7194
7195               if (c & 0x80)
7196                 c = BYTE8_TO_CHAR (c);
7197               coding->charbuf[coding->charbuf_used++] = c;
7198             }
7199           produce_chars (coding, Qnil, 1);
7200         }
7201       else
7202         {
7203           /* Record unprocessed bytes in coding->carryover.  We are
7204              sure that the number of data is less than the size of
7205              coding->carryover.  */
7206           unsigned char *p = coding->carryover;
7207
7208           if (nbytes > sizeof coding->carryover)
7209             nbytes = sizeof coding->carryover;
7210           coding->carryover_bytes = nbytes;
7211           while (nbytes-- > 0)
7212             *p++ = *src++;
7213         }
7214       coding->consumed = coding->src_bytes;
7215     }
7216
7217   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7218       && !inhibit_eol_conversion)
7219     decode_eol (coding);
7220   if (BUFFERP (coding->dst_object))
7221     {
7222       current_buffer->undo_list = undo_list;
7223       record_insert (coding->dst_pos, coding->produced_char);
7224     }
7225   return coding->result;
7226 }
7227
7228
7229 /* Extract an annotation datum from a composition starting at POS and
7230    ending before LIMIT of CODING->src_object (buffer or string), store
7231    the data in BUF, set *STOP to a starting position of the next
7232    composition (if any) or to LIMIT, and return the address of the
7233    next element of BUF.
7234
7235    If such an annotation is not found, set *STOP to a starting
7236    position of a composition after POS (if any) or to LIMIT, and
7237    return BUF.  */
7238
7239 static INLINE int *
7240 handle_composition_annotation (pos, limit, coding, buf, stop)
7241      EMACS_INT pos, limit;
7242      struct coding_system *coding;
7243      int *buf;
7244      EMACS_INT *stop;
7245 {
7246   EMACS_INT start, end;
7247   Lisp_Object prop;
7248
7249   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7250       || end > limit)
7251     *stop = limit;
7252   else if (start > pos)
7253     *stop = start;
7254   else
7255     {
7256       if (start == pos)
7257         {
7258           /* We found a composition.  Store the corresponding
7259              annotation data in BUF.  */
7260           int *head = buf;
7261           enum composition_method method = COMPOSITION_METHOD (prop);
7262           int nchars = COMPOSITION_LENGTH (prop);
7263
7264           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7265           if (method != COMPOSITION_RELATIVE)
7266             {
7267               Lisp_Object components;
7268               int len, i, i_byte;
7269
7270               components = COMPOSITION_COMPONENTS (prop);
7271               if (VECTORP (components))
7272                 {
7273                   len = XVECTOR (components)->size;
7274                   for (i = 0; i < len; i++)
7275                     *buf++ = XINT (AREF (components, i));
7276                 }
7277               else if (STRINGP (components))
7278                 {
7279                   len = SCHARS (components);
7280                   i = i_byte = 0;
7281                   while (i < len)
7282                     {
7283                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7284                       buf++;
7285                     }
7286                 }
7287               else if (INTEGERP (components))
7288                 {
7289                   len = 1;
7290                   *buf++ = XINT (components);
7291                 }
7292               else if (CONSP (components))
7293                 {
7294                   for (len = 0; CONSP (components);
7295                        len++, components = XCDR (components))
7296                     *buf++ = XINT (XCAR (components));
7297                 }
7298               else
7299                 abort ();
7300               *head -= len;
7301             }
7302         }
7303
7304       if (find_composition (end, limit, &start, &end, &prop,
7305                             coding->src_object)
7306           && end <= limit)
7307         *stop = start;
7308       else
7309         *stop = limit;
7310     }
7311   return buf;
7312 }
7313
7314
7315 /* Extract an annotation datum from a text property `charset' at POS of
7316    CODING->src_object (buffer of string), store the data in BUF, set
7317    *STOP to the position where the value of `charset' property changes
7318    (limiting by LIMIT), and return the address of the next element of
7319    BUF.
7320
7321    If the property value is nil, set *STOP to the position where the
7322    property value is non-nil (limiting by LIMIT), and return BUF.  */
7323
7324 static INLINE int *
7325 handle_charset_annotation (pos, limit, coding, buf, stop)
7326      EMACS_INT pos, limit;
7327      struct coding_system *coding;
7328      int *buf;
7329      EMACS_INT *stop;
7330 {
7331   Lisp_Object val, next;
7332   int id;
7333
7334   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7335   if (! NILP (val) && CHARSETP (val))
7336     id = XINT (CHARSET_SYMBOL_ID (val));
7337   else
7338     id = -1;
7339   ADD_CHARSET_DATA (buf, 0, id);
7340   next = Fnext_single_property_change (make_number (pos), Qcharset,
7341                                        coding->src_object,
7342                                        make_number (limit));
7343   *stop = XINT (next);
7344   return buf;
7345 }
7346
7347
7348 static void
7349 consume_chars (coding, translation_table, max_lookup)
7350      struct coding_system *coding;
7351      Lisp_Object translation_table;
7352      int max_lookup;
7353 {
7354   int *buf = coding->charbuf;
7355   int *buf_end = coding->charbuf + coding->charbuf_size;
7356   const unsigned char *src = coding->source + coding->consumed;
7357   const unsigned char *src_end = coding->source + coding->src_bytes;
7358   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7359   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7360   int multibytep = coding->src_multibyte;
7361   Lisp_Object eol_type;
7362   int c;
7363   EMACS_INT stop, stop_composition, stop_charset;
7364   int *lookup_buf = NULL;
7365
7366   if (! NILP (translation_table))
7367     lookup_buf = alloca (sizeof (int) * max_lookup);
7368
7369   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7370   if (VECTORP (eol_type))
7371     eol_type = Qunix;
7372
7373   /* Note: composition handling is not yet implemented.  */
7374   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7375
7376   if (NILP (coding->src_object))
7377     stop = stop_composition = stop_charset = end_pos;
7378   else
7379     {
7380       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7381         stop = stop_composition = pos;
7382       else
7383         stop = stop_composition = end_pos;
7384       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7385         stop = stop_charset = pos;
7386       else
7387         stop_charset = end_pos;
7388     }
7389
7390   /* Compensate for CRLF and conversion.  */
7391   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7392   while (buf < buf_end)
7393     {
7394       Lisp_Object trans;
7395
7396       if (pos == stop)
7397         {
7398           if (pos == end_pos)
7399             break;
7400           if (pos == stop_composition)
7401             buf = handle_composition_annotation (pos, end_pos, coding,
7402                                                  buf, &stop_composition);
7403           if (pos == stop_charset)
7404             buf = handle_charset_annotation (pos, end_pos, coding,
7405                                              buf, &stop_charset);
7406           stop = (stop_composition < stop_charset
7407                   ? stop_composition : stop_charset);
7408         }
7409
7410       if (! multibytep)
7411         {
7412           EMACS_INT bytes;
7413
7414           if (coding->encoder == encode_coding_raw_text)
7415             c = *src++, pos++;
7416           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7417             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7418           else
7419             c = BYTE8_TO_CHAR (*src), src++, pos++;
7420         }
7421       else
7422         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7423       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7424         c = '\n';
7425       if (! EQ (eol_type, Qunix))
7426         {
7427           if (c == '\n')
7428             {
7429               if (EQ (eol_type, Qdos))
7430                 *buf++ = '\r';
7431               else
7432                 c = '\r';
7433             }
7434         }
7435
7436       trans = Qnil;
7437       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7438       if (NILP (trans))
7439         *buf++ = c;
7440       else
7441         {
7442           int from_nchars = 1, to_nchars = 1;
7443           int *lookup_buf_end;
7444           const unsigned char *p = src;
7445           int i;
7446
7447           lookup_buf[0] = c;
7448           for (i = 1; i < max_lookup && p < src_end; i++)
7449             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7450           lookup_buf_end = lookup_buf + i;
7451           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7452           if (INTEGERP (trans))
7453             c = XINT (trans);
7454           else if (CONSP (trans))
7455             {
7456               from_nchars = ASIZE (XCAR (trans));
7457               trans = XCDR (trans);
7458               if (INTEGERP (trans))
7459                 c = XINT (trans);
7460               else
7461                 {
7462                   to_nchars = ASIZE (trans);
7463                   if (buf + to_nchars > buf_end)
7464                     break;
7465                   c = XINT (AREF (trans, 0));
7466                 }
7467             }
7468           else
7469             break;
7470           *buf++ = c;
7471           for (i = 1; i < to_nchars; i++)
7472             *buf++ = XINT (AREF (trans, i));
7473           for (i = 1; i < from_nchars; i++, pos++)
7474             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7475         }
7476     }
7477
7478   coding->consumed = src - coding->source;
7479   coding->consumed_char = pos - coding->src_pos;
7480   coding->charbuf_used = buf - coding->charbuf;
7481   coding->chars_at_source = 0;
7482 }
7483
7484
7485 /* Encode the text at CODING->src_object into CODING->dst_object.
7486    CODING->src_object is a buffer or a string.
7487    CODING->dst_object is a buffer or nil.
7488
7489    If CODING->src_object is a buffer, it must be the current buffer.
7490    In this case, if CODING->src_pos is positive, it is a position of
7491    the source text in the buffer, otherwise. the source text is in the
7492    gap area of the buffer, and coding->src_pos specifies the offset of
7493    the text from GPT (which must be the same as PT).  If this is the
7494    same buffer as CODING->dst_object, CODING->src_pos must be
7495    negative and CODING should not have `pre-write-conversion'.
7496
7497    If CODING->src_object is a string, CODING should not have
7498    `pre-write-conversion'.
7499
7500    If CODING->dst_object is a buffer, the encoded data is inserted at
7501    the current point of that buffer.
7502
7503    If CODING->dst_object is nil, the encoded data is placed at the
7504    memory area specified by CODING->destination.  */
7505
7506 static int
7507 encode_coding (coding)
7508      struct coding_system *coding;
7509 {
7510   Lisp_Object attrs;
7511   Lisp_Object translation_table;
7512   int max_lookup;
7513
7514   attrs = CODING_ID_ATTRS (coding->id);
7515   if (coding->encoder == encode_coding_raw_text)
7516     translation_table = Qnil, max_lookup = 0;
7517   else
7518     translation_table = get_translation_table (attrs, 1, &max_lookup);
7519
7520   if (BUFFERP (coding->dst_object))
7521     {
7522       set_buffer_internal (XBUFFER (coding->dst_object));
7523       coding->dst_multibyte
7524         = ! NILP (current_buffer->enable_multibyte_characters);
7525     }
7526
7527   coding->consumed = coding->consumed_char = 0;
7528   coding->produced = coding->produced_char = 0;
7529   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7530   coding->errors = 0;
7531
7532   ALLOC_CONVERSION_WORK_AREA (coding);
7533
7534   do {
7535     coding_set_source (coding);
7536     consume_chars (coding, translation_table, max_lookup);
7537     coding_set_destination (coding);
7538     (*(coding->encoder)) (coding);
7539   } while (coding->consumed_char < coding->src_chars);
7540
7541   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7542     insert_from_gap (coding->produced_char, coding->produced);
7543
7544   return (coding->result);
7545 }
7546
7547
7548 /* Name (or base name) of work buffer for code conversion.  */
7549 static Lisp_Object Vcode_conversion_workbuf_name;
7550
7551 /* A working buffer used by the top level conversion.  Once it is
7552    created, it is never destroyed.  It has the name
7553    Vcode_conversion_workbuf_name.  The other working buffers are
7554    destroyed after the use is finished, and their names are modified
7555    versions of Vcode_conversion_workbuf_name.  */
7556 static Lisp_Object Vcode_conversion_reused_workbuf;
7557
7558 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7559 static int reused_workbuf_in_use;
7560
7561
7562 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7563    multibyteness of returning buffer.  */
7564
7565 static Lisp_Object
7566 make_conversion_work_buffer (multibyte)
7567      int multibyte;
7568 {
7569   Lisp_Object name, workbuf;
7570   struct buffer *current;
7571
7572   if (reused_workbuf_in_use++)
7573     {
7574       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7575       workbuf = Fget_buffer_create (name);
7576     }
7577   else
7578     {
7579       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7580         Vcode_conversion_reused_workbuf
7581           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7582       workbuf = Vcode_conversion_reused_workbuf;
7583     }
7584   current = current_buffer;
7585   set_buffer_internal (XBUFFER (workbuf));
7586   /* We can't allow modification hooks to run in the work buffer.  For
7587      instance, directory_files_internal assumes that file decoding
7588      doesn't compile new regexps.  */
7589   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7590   Ferase_buffer ();
7591   current_buffer->undo_list = Qt;
7592   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7593   set_buffer_internal (current);
7594   return workbuf;
7595 }
7596
7597
7598 static Lisp_Object
7599 code_conversion_restore (arg)
7600      Lisp_Object arg;
7601 {
7602   Lisp_Object current, workbuf;
7603   struct gcpro gcpro1;
7604
7605   GCPRO1 (arg);
7606   current = XCAR (arg);
7607   workbuf = XCDR (arg);
7608   if (! NILP (workbuf))
7609     {
7610       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7611         reused_workbuf_in_use = 0;
7612       else if (! NILP (Fbuffer_live_p (workbuf)))
7613         Fkill_buffer (workbuf);
7614     }
7615   set_buffer_internal (XBUFFER (current));
7616   UNGCPRO;
7617   return Qnil;
7618 }
7619
7620 Lisp_Object
7621 code_conversion_save (with_work_buf, multibyte)
7622      int with_work_buf, multibyte;
7623 {
7624   Lisp_Object workbuf = Qnil;
7625
7626   if (with_work_buf)
7627     workbuf = make_conversion_work_buffer (multibyte);
7628   record_unwind_protect (code_conversion_restore,
7629                          Fcons (Fcurrent_buffer (), workbuf));
7630   return workbuf;
7631 }
7632
7633 int
7634 decode_coding_gap (coding, chars, bytes)
7635      struct coding_system *coding;
7636      EMACS_INT chars, bytes;
7637 {
7638   int count = specpdl_ptr - specpdl;
7639   Lisp_Object attrs;
7640
7641   code_conversion_save (0, 0);
7642
7643   coding->src_object = Fcurrent_buffer ();
7644   coding->src_chars = chars;
7645   coding->src_bytes = bytes;
7646   coding->src_pos = -chars;
7647   coding->src_pos_byte = -bytes;
7648   coding->src_multibyte = chars < bytes;
7649   coding->dst_object = coding->src_object;
7650   coding->dst_pos = PT;
7651   coding->dst_pos_byte = PT_BYTE;
7652   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7653
7654   if (CODING_REQUIRE_DETECTION (coding))
7655     detect_coding (coding);
7656
7657   coding->mode |= CODING_MODE_LAST_BLOCK;
7658   current_buffer->text->inhibit_shrinking = 1;
7659   decode_coding (coding);
7660   current_buffer->text->inhibit_shrinking = 0;
7661
7662   attrs = CODING_ID_ATTRS (coding->id);
7663   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7664     {
7665       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7666       Lisp_Object val;
7667
7668       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7669       val = call1 (CODING_ATTR_POST_READ (attrs),
7670                    make_number (coding->produced_char));
7671       CHECK_NATNUM (val);
7672       coding->produced_char += Z - prev_Z;
7673       coding->produced += Z_BYTE - prev_Z_BYTE;
7674     }
7675
7676   unbind_to (count, Qnil);
7677   return coding->result;
7678 }
7679
7680 int
7681 encode_coding_gap (coding, chars, bytes)
7682      struct coding_system *coding;
7683      EMACS_INT chars, bytes;
7684 {
7685   int count = specpdl_ptr - specpdl;
7686
7687   code_conversion_save (0, 0);
7688
7689   coding->src_object = Fcurrent_buffer ();
7690   coding->src_chars = chars;
7691   coding->src_bytes = bytes;
7692   coding->src_pos = -chars;
7693   coding->src_pos_byte = -bytes;
7694   coding->src_multibyte = chars < bytes;
7695   coding->dst_object = coding->src_object;
7696   coding->dst_pos = PT;
7697   coding->dst_pos_byte = PT_BYTE;
7698
7699   encode_coding (coding);
7700
7701   unbind_to (count, Qnil);
7702   return coding->result;
7703 }
7704
7705
7706 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7707    SRC_OBJECT into DST_OBJECT by coding context CODING.
7708
7709    SRC_OBJECT is a buffer, a string, or Qnil.
7710
7711    If it is a buffer, the text is at point of the buffer.  FROM and TO
7712    are positions in the buffer.
7713
7714    If it is a string, the text is at the beginning of the string.
7715    FROM and TO are indices to the string.
7716
7717    If it is nil, the text is at coding->source.  FROM and TO are
7718    indices to coding->source.
7719
7720    DST_OBJECT is a buffer, Qt, or Qnil.
7721
7722    If it is a buffer, the decoded text is inserted at point of the
7723    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7724    is deleted.
7725
7726    If it is Qt, a string is made from the decoded text, and
7727    set in CODING->dst_object.
7728
7729    If it is Qnil, the decoded text is stored at CODING->destination.
7730    The caller must allocate CODING->dst_bytes bytes at
7731    CODING->destination by xmalloc.  If the decoded text is longer than
7732    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7733  */
7734
7735 void
7736 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7737                       dst_object)
7738      struct coding_system *coding;
7739      Lisp_Object src_object;
7740      EMACS_INT from, from_byte, to, to_byte;
7741      Lisp_Object dst_object;
7742 {
7743   int count = specpdl_ptr - specpdl;
7744   unsigned char *destination;
7745   EMACS_INT dst_bytes;
7746   EMACS_INT chars = to - from;
7747   EMACS_INT bytes = to_byte - from_byte;
7748   Lisp_Object attrs;
7749   int saved_pt = -1, saved_pt_byte;
7750   int need_marker_adjustment = 0;
7751   Lisp_Object old_deactivate_mark;
7752
7753   old_deactivate_mark = Vdeactivate_mark;
7754
7755   if (NILP (dst_object))
7756     {
7757       destination = coding->destination;
7758       dst_bytes = coding->dst_bytes;
7759     }
7760
7761   coding->src_object = src_object;
7762   coding->src_chars = chars;
7763   coding->src_bytes = bytes;
7764   coding->src_multibyte = chars < bytes;
7765
7766   if (STRINGP (src_object))
7767     {
7768       coding->src_pos = from;
7769       coding->src_pos_byte = from_byte;
7770     }
7771   else if (BUFFERP (src_object))
7772     {
7773       set_buffer_internal (XBUFFER (src_object));
7774       if (from != GPT)
7775         move_gap_both (from, from_byte);
7776       if (EQ (src_object, dst_object))
7777         {
7778           struct Lisp_Marker *tail;
7779
7780           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7781             {
7782               tail->need_adjustment
7783                 = tail->charpos == (tail->insertion_type ? from : to);
7784               need_marker_adjustment |= tail->need_adjustment;
7785             }
7786           saved_pt = PT, saved_pt_byte = PT_BYTE;
7787           TEMP_SET_PT_BOTH (from, from_byte);
7788           current_buffer->text->inhibit_shrinking = 1;
7789           del_range_both (from, from_byte, to, to_byte, 1);
7790           coding->src_pos = -chars;
7791           coding->src_pos_byte = -bytes;
7792         }
7793       else
7794         {
7795           coding->src_pos = from;
7796           coding->src_pos_byte = from_byte;
7797         }
7798     }
7799
7800   if (CODING_REQUIRE_DETECTION (coding))
7801     detect_coding (coding);
7802   attrs = CODING_ID_ATTRS (coding->id);
7803
7804   if (EQ (dst_object, Qt)
7805       || (! NILP (CODING_ATTR_POST_READ (attrs))
7806           && NILP (dst_object)))
7807     {
7808       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7809       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7810       coding->dst_pos = BEG;
7811       coding->dst_pos_byte = BEG_BYTE;
7812     }
7813   else if (BUFFERP (dst_object))
7814     {
7815       code_conversion_save (0, 0);
7816       coding->dst_object = dst_object;
7817       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7818       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7819       coding->dst_multibyte
7820         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7821     }
7822   else
7823     {
7824       code_conversion_save (0, 0);
7825       coding->dst_object = Qnil;
7826       /* Most callers presume this will return a multibyte result, and they
7827          won't use `binary' or `raw-text' anyway, so let's not worry about
7828          CODING_FOR_UNIBYTE.  */
7829       coding->dst_multibyte = 1;
7830     }
7831
7832   decode_coding (coding);
7833
7834   if (BUFFERP (coding->dst_object))
7835     set_buffer_internal (XBUFFER (coding->dst_object));
7836
7837   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7838     {
7839       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7840       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7841       Lisp_Object val;
7842
7843       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7844       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7845               old_deactivate_mark);
7846       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7847                         make_number (coding->produced_char));
7848       UNGCPRO;
7849       CHECK_NATNUM (val);
7850       coding->produced_char += Z - prev_Z;
7851       coding->produced += Z_BYTE - prev_Z_BYTE;
7852     }
7853
7854   if (EQ (dst_object, Qt))
7855     {
7856       coding->dst_object = Fbuffer_string ();
7857     }
7858   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7859     {
7860       set_buffer_internal (XBUFFER (coding->dst_object));
7861       if (dst_bytes < coding->produced)
7862         {
7863           destination = xrealloc (destination, coding->produced);
7864           if (! destination)
7865             {
7866               record_conversion_result (coding,
7867                                         CODING_RESULT_INSUFFICIENT_DST);
7868               unbind_to (count, Qnil);
7869               return;
7870             }
7871           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7872             move_gap_both (BEGV, BEGV_BYTE);
7873           bcopy (BEGV_ADDR, destination, coding->produced);
7874           coding->destination = destination;
7875         }
7876     }
7877
7878   if (saved_pt >= 0)
7879     {
7880       /* This is the case of:
7881          (BUFFERP (src_object) && EQ (src_object, dst_object))
7882          As we have moved PT while replacing the original buffer
7883          contents, we must recover it now.  */
7884       set_buffer_internal (XBUFFER (src_object));
7885       current_buffer->text->inhibit_shrinking = 0;
7886       if (saved_pt < from)
7887         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7888       else if (saved_pt < from + chars)
7889         TEMP_SET_PT_BOTH (from, from_byte);
7890       else if (! NILP (current_buffer->enable_multibyte_characters))
7891         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7892                           saved_pt_byte + (coding->produced - bytes));
7893       else
7894         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7895                           saved_pt_byte + (coding->produced - bytes));
7896
7897       if (need_marker_adjustment)
7898         {
7899           struct Lisp_Marker *tail;
7900
7901           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7902             if (tail->need_adjustment)
7903               {
7904                 tail->need_adjustment = 0;
7905                 if (tail->insertion_type)
7906                   {
7907                     tail->bytepos = from_byte;
7908                     tail->charpos = from;
7909                   }
7910                 else
7911                   {
7912                     tail->bytepos = from_byte + coding->produced;
7913                     tail->charpos
7914                       = (NILP (current_buffer->enable_multibyte_characters)
7915                          ? tail->bytepos : from + coding->produced_char);
7916                   }
7917               }
7918         }
7919     }
7920
7921   Vdeactivate_mark = old_deactivate_mark;
7922   unbind_to (count, coding->dst_object);
7923 }
7924
7925
7926 void
7927 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7928                       dst_object)
7929      struct coding_system *coding;
7930      Lisp_Object src_object;
7931      EMACS_INT from, from_byte, to, to_byte;
7932      Lisp_Object dst_object;
7933 {
7934   int count = specpdl_ptr - specpdl;
7935   EMACS_INT chars = to - from;
7936   EMACS_INT bytes = to_byte - from_byte;
7937   Lisp_Object attrs;
7938   int saved_pt = -1, saved_pt_byte;
7939   int need_marker_adjustment = 0;
7940   int kill_src_buffer = 0;
7941   Lisp_Object old_deactivate_mark;
7942
7943   old_deactivate_mark = Vdeactivate_mark;
7944
7945   coding->src_object = src_object;
7946   coding->src_chars = chars;
7947   coding->src_bytes = bytes;
7948   coding->src_multibyte = chars < bytes;
7949
7950   attrs = CODING_ID_ATTRS (coding->id);
7951
7952   if (EQ (src_object, dst_object))
7953     {
7954       struct Lisp_Marker *tail;
7955
7956       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7957         {
7958           tail->need_adjustment
7959             = tail->charpos == (tail->insertion_type ? from : to);
7960           need_marker_adjustment |= tail->need_adjustment;
7961         }
7962     }
7963
7964   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7965     {
7966       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7967       set_buffer_internal (XBUFFER (coding->src_object));
7968       if (STRINGP (src_object))
7969         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7970       else if (BUFFERP (src_object))
7971         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7972       else
7973         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7974
7975       if (EQ (src_object, dst_object))
7976         {
7977           set_buffer_internal (XBUFFER (src_object));
7978           saved_pt = PT, saved_pt_byte = PT_BYTE;
7979           del_range_both (from, from_byte, to, to_byte, 1);
7980           set_buffer_internal (XBUFFER (coding->src_object));
7981         }
7982
7983       {
7984         Lisp_Object args[3];
7985         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7986
7987         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7988                 old_deactivate_mark);
7989         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7990         args[1] = make_number (BEG);
7991         args[2] = make_number (Z);
7992         safe_call (3, args);
7993         UNGCPRO;
7994       }
7995       if (XBUFFER (coding->src_object) != current_buffer)
7996         kill_src_buffer = 1;
7997       coding->src_object = Fcurrent_buffer ();
7998       if (BEG != GPT)
7999         move_gap_both (BEG, BEG_BYTE);
8000       coding->src_chars = Z - BEG;
8001       coding->src_bytes = Z_BYTE - BEG_BYTE;
8002       coding->src_pos = BEG;
8003       coding->src_pos_byte = BEG_BYTE;
8004       coding->src_multibyte = Z < Z_BYTE;
8005     }
8006   else if (STRINGP (src_object))
8007     {
8008       code_conversion_save (0, 0);
8009       coding->src_pos = from;
8010       coding->src_pos_byte = from_byte;
8011     }
8012   else if (BUFFERP (src_object))
8013     {
8014       code_conversion_save (0, 0);
8015       set_buffer_internal (XBUFFER (src_object));
8016       if (EQ (src_object, dst_object))
8017         {
8018           saved_pt = PT, saved_pt_byte = PT_BYTE;
8019           coding->src_object = del_range_1 (from, to, 1, 1);
8020           coding->src_pos = 0;
8021           coding->src_pos_byte = 0;
8022         }
8023       else
8024         {
8025           if (from < GPT && to >= GPT)
8026             move_gap_both (from, from_byte);
8027           coding->src_pos = from;
8028           coding->src_pos_byte = from_byte;
8029         }
8030     }
8031   else
8032     code_conversion_save (0, 0);
8033
8034   if (BUFFERP (dst_object))
8035     {
8036       coding->dst_object = dst_object;
8037       if (EQ (src_object, dst_object))
8038         {
8039           coding->dst_pos = from;
8040           coding->dst_pos_byte = from_byte;
8041         }
8042       else
8043         {
8044           struct buffer *current = current_buffer;
8045
8046           set_buffer_temp (XBUFFER (dst_object));
8047           coding->dst_pos = PT;
8048           coding->dst_pos_byte = PT_BYTE;
8049           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
8050           set_buffer_temp (current);
8051         }
8052       coding->dst_multibyte
8053         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
8054     }
8055   else if (EQ (dst_object, Qt))
8056     {
8057       coding->dst_object = Qnil;
8058       coding->dst_bytes = coding->src_chars;
8059       if (coding->dst_bytes == 0)
8060         coding->dst_bytes = 1;
8061       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
8062       coding->dst_multibyte = 0;
8063     }
8064   else
8065     {
8066       coding->dst_object = Qnil;
8067       coding->dst_multibyte = 0;
8068     }
8069
8070   encode_coding (coding);
8071
8072   if (EQ (dst_object, Qt))
8073     {
8074       if (BUFFERP (coding->dst_object))
8075         coding->dst_object = Fbuffer_string ();
8076       else
8077         {
8078           coding->dst_object
8079             = make_unibyte_string ((char *) coding->destination,
8080                                    coding->produced);
8081           xfree (coding->destination);
8082         }
8083     }
8084
8085   if (saved_pt >= 0)
8086     {
8087       /* This is the case of:
8088          (BUFFERP (src_object) && EQ (src_object, dst_object))
8089          As we have moved PT while replacing the original buffer
8090          contents, we must recover it now.  */
8091       set_buffer_internal (XBUFFER (src_object));
8092       if (saved_pt < from)
8093         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8094       else if (saved_pt < from + chars)
8095         TEMP_SET_PT_BOTH (from, from_byte);
8096       else if (! NILP (current_buffer->enable_multibyte_characters))
8097         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8098                           saved_pt_byte + (coding->produced - bytes));
8099       else
8100         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8101                           saved_pt_byte + (coding->produced - bytes));
8102
8103       if (need_marker_adjustment)
8104         {
8105           struct Lisp_Marker *tail;
8106
8107           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8108             if (tail->need_adjustment)
8109               {
8110                 tail->need_adjustment = 0;
8111                 if (tail->insertion_type)
8112                   {
8113                     tail->bytepos = from_byte;
8114                     tail->charpos = from;
8115                   }
8116                 else
8117                   {
8118                     tail->bytepos = from_byte + coding->produced;
8119                     tail->charpos
8120                       = (NILP (current_buffer->enable_multibyte_characters)
8121                          ? tail->bytepos : from + coding->produced_char);
8122                   }
8123               }
8124         }
8125     }
8126
8127   if (kill_src_buffer)
8128     Fkill_buffer (coding->src_object);
8129
8130   Vdeactivate_mark = old_deactivate_mark;
8131   unbind_to (count, Qnil);
8132 }
8133
8134
8135 Lisp_Object
8136 preferred_coding_system ()
8137 {
8138   int id = coding_categories[coding_priorities[0]].id;
8139
8140   return CODING_ID_NAME (id);
8141 }
8142
8143 \f
8144 #ifdef emacs
8145 /*** 8. Emacs Lisp library functions ***/
8146
8147 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8148        doc: /* Return t if OBJECT is nil or a coding-system.
8149 See the documentation of `define-coding-system' for information
8150 about coding-system objects.  */)
8151      (object)
8152      Lisp_Object object;
8153 {
8154   if (NILP (object)
8155       || CODING_SYSTEM_ID (object) >= 0)
8156     return Qt;
8157   if (! SYMBOLP (object)
8158       || NILP (Fget (object, Qcoding_system_define_form)))
8159     return Qnil;
8160   return Qt;
8161 }
8162
8163 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8164        Sread_non_nil_coding_system, 1, 1, 0,
8165        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8166      (prompt)
8167      Lisp_Object prompt;
8168 {
8169   Lisp_Object val;
8170   do
8171     {
8172       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8173                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8174     }
8175   while (SCHARS (val) == 0);
8176   return (Fintern (val, Qnil));
8177 }
8178
8179 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8180        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8181 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8182 Ignores case when completing coding systems (all Emacs coding systems
8183 are lower-case).  */)
8184      (prompt, default_coding_system)
8185      Lisp_Object prompt, default_coding_system;
8186 {
8187   Lisp_Object val;
8188   int count = SPECPDL_INDEX ();
8189
8190   if (SYMBOLP (default_coding_system))
8191     default_coding_system = SYMBOL_NAME (default_coding_system);
8192   specbind (Qcompletion_ignore_case, Qt);
8193   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8194                           Qt, Qnil, Qcoding_system_history,
8195                           default_coding_system, Qnil);
8196   unbind_to (count, Qnil);
8197   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8198 }
8199
8200 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8201        1, 1, 0,
8202        doc: /* Check validity of CODING-SYSTEM.
8203 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8204 It is valid if it is nil or a symbol defined as a coding system by the
8205 function `define-coding-system'.  */)
8206   (coding_system)
8207      Lisp_Object coding_system;
8208 {
8209   Lisp_Object define_form;
8210
8211   define_form = Fget (coding_system, Qcoding_system_define_form);
8212   if (! NILP (define_form))
8213     {
8214       Fput (coding_system, Qcoding_system_define_form, Qnil);
8215       safe_eval (define_form);
8216     }
8217   if (!NILP (Fcoding_system_p (coding_system)))
8218     return coding_system;
8219   xsignal1 (Qcoding_system_error, coding_system);
8220 }
8221
8222 \f
8223 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8224    HIGHEST is nonzero, return the coding system of the highest
8225    priority among the detected coding systems.  Otherwize return a
8226    list of detected coding systems sorted by their priorities.  If
8227    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8228    multibyte form but contains only ASCII and eight-bit chars.
8229    Otherwise, the bytes are raw bytes.
8230
8231    CODING-SYSTEM controls the detection as below:
8232
8233    If it is nil, detect both text-format and eol-format.  If the
8234    text-format part of CODING-SYSTEM is already specified
8235    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8236    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8237    detect only text-format.  */
8238
8239 Lisp_Object
8240 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
8241                       coding_system)
8242      const unsigned char *src;
8243      EMACS_INT src_chars, src_bytes;
8244      int highest;
8245      int multibytep;
8246      Lisp_Object coding_system;
8247 {
8248   const unsigned char *src_end = src + src_bytes;
8249   Lisp_Object attrs, eol_type;
8250   Lisp_Object val = Qnil;
8251   struct coding_system coding;
8252   int id;
8253   struct coding_detection_info detect_info;
8254   enum coding_category base_category;
8255   int null_byte_found = 0, eight_bit_found = 0;
8256
8257   if (NILP (coding_system))
8258     coding_system = Qundecided;
8259   setup_coding_system (coding_system, &coding);
8260   attrs = CODING_ID_ATTRS (coding.id);
8261   eol_type = CODING_ID_EOL_TYPE (coding.id);
8262   coding_system = CODING_ATTR_BASE_NAME (attrs);
8263
8264   coding.source = src;
8265   coding.src_chars = src_chars;
8266   coding.src_bytes = src_bytes;
8267   coding.src_multibyte = multibytep;
8268   coding.consumed = 0;
8269   coding.mode |= CODING_MODE_LAST_BLOCK;
8270   coding.head_ascii = 0;
8271
8272   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8273
8274   /* At first, detect text-format if necessary.  */
8275   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8276   if (base_category == coding_category_undecided)
8277     {
8278       enum coding_category category;
8279       struct coding_system *this;
8280       int c, i;
8281
8282       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8283       for (; src < src_end; src++)
8284         {
8285           c = *src;
8286           if (c & 0x80)
8287             {
8288               eight_bit_found = 1;
8289               if (null_byte_found)
8290                 break;
8291             }
8292           else if (c < 0x20)
8293             {
8294               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8295                   && ! inhibit_iso_escape_detection
8296                   && ! detect_info.checked)
8297                 {
8298                   if (detect_coding_iso_2022 (&coding, &detect_info))
8299                     {
8300                       /* We have scanned the whole data.  */
8301                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8302                         {
8303                           /* We didn't find an 8-bit code.  We may
8304                              have found a null-byte, but it's very
8305                              rare that a binary file confirm to
8306                              ISO-2022.  */
8307                           src = src_end;
8308                           coding.head_ascii = src - coding.source;
8309                         }
8310                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8311                       break;
8312                     }
8313                 }
8314               else if (! c && !inhibit_null_byte_detection)
8315                 {
8316                   null_byte_found = 1;
8317                   if (eight_bit_found)
8318                     break;
8319                 }
8320               if (! eight_bit_found)
8321                 coding.head_ascii++;
8322             }
8323           else if (! eight_bit_found)
8324             coding.head_ascii++;
8325         }
8326
8327       if (null_byte_found || eight_bit_found
8328           || coding.head_ascii < coding.src_bytes
8329           || detect_info.found)
8330         {
8331           if (coding.head_ascii == coding.src_bytes)
8332             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8333             for (i = 0; i < coding_category_raw_text; i++)
8334               {
8335                 category = coding_priorities[i];
8336                 this = coding_categories + category;
8337                 if (detect_info.found & (1 << category))
8338                   break;
8339               }
8340           else
8341             {
8342               if (null_byte_found)
8343                 {
8344                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8345                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8346                 }
8347               for (i = 0; i < coding_category_raw_text; i++)
8348                 {
8349                   category = coding_priorities[i];
8350                   this = coding_categories + category;
8351
8352                   if (this->id < 0)
8353                     {
8354                       /* No coding system of this category is defined.  */
8355                       detect_info.rejected |= (1 << category);
8356                     }
8357                   else if (category >= coding_category_raw_text)
8358                     continue;
8359                   else if (detect_info.checked & (1 << category))
8360                     {
8361                       if (highest
8362                           && (detect_info.found & (1 << category)))
8363                         break;
8364                     }
8365                   else if ((*(this->detector)) (&coding, &detect_info)
8366                            && highest
8367                            && (detect_info.found & (1 << category)))
8368                     {
8369                       if (category == coding_category_utf_16_auto)
8370                         {
8371                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8372                             category = coding_category_utf_16_le;
8373                           else
8374                             category = coding_category_utf_16_be;
8375                         }
8376                       break;
8377                     }
8378                 }
8379             }
8380         }
8381
8382       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8383           || null_byte_found)
8384         {
8385           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8386           id = CODING_SYSTEM_ID (Qno_conversion);
8387           val = Fcons (make_number (id), Qnil);
8388         }
8389       else if (! detect_info.rejected && ! detect_info.found)
8390         {
8391           detect_info.found = CATEGORY_MASK_ANY;
8392           id = coding_categories[coding_category_undecided].id;
8393           val = Fcons (make_number (id), Qnil);
8394         }
8395       else if (highest)
8396         {
8397           if (detect_info.found)
8398             {
8399               detect_info.found = 1 << category;
8400               val = Fcons (make_number (this->id), Qnil);
8401             }
8402           else
8403             for (i = 0; i < coding_category_raw_text; i++)
8404               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8405                 {
8406                   detect_info.found = 1 << coding_priorities[i];
8407                   id = coding_categories[coding_priorities[i]].id;
8408                   val = Fcons (make_number (id), Qnil);
8409                   break;
8410                 }
8411         }
8412       else
8413         {
8414           int mask = detect_info.rejected | detect_info.found;
8415           int found = 0;
8416
8417           for (i = coding_category_raw_text - 1; i >= 0; i--)
8418             {
8419               category = coding_priorities[i];
8420               if (! (mask & (1 << category)))
8421                 {
8422                   found |= 1 << category;
8423                   id = coding_categories[category].id;
8424                   if (id >= 0)
8425                     val = Fcons (make_number (id), val);
8426                 }
8427             }
8428           for (i = coding_category_raw_text - 1; i >= 0; i--)
8429             {
8430               category = coding_priorities[i];
8431               if (detect_info.found & (1 << category))
8432                 {
8433                   id = coding_categories[category].id;
8434                   val = Fcons (make_number (id), val);
8435                 }
8436             }
8437           detect_info.found |= found;
8438         }
8439     }
8440   else if (base_category == coding_category_utf_8_auto)
8441     {
8442       if (detect_coding_utf_8 (&coding, &detect_info))
8443         {
8444           struct coding_system *this;
8445
8446           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8447             this = coding_categories + coding_category_utf_8_sig;
8448           else
8449             this = coding_categories + coding_category_utf_8_nosig;
8450           val = Fcons (make_number (this->id), Qnil);
8451         }
8452     }
8453   else if (base_category == coding_category_utf_16_auto)
8454     {
8455       if (detect_coding_utf_16 (&coding, &detect_info))
8456         {
8457           struct coding_system *this;
8458
8459           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8460             this = coding_categories + coding_category_utf_16_le;
8461           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8462             this = coding_categories + coding_category_utf_16_be;
8463           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8464             this = coding_categories + coding_category_utf_16_be_nosig;
8465           else
8466             this = coding_categories + coding_category_utf_16_le_nosig;
8467           val = Fcons (make_number (this->id), Qnil);
8468         }
8469     }
8470   else
8471     {
8472       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8473       val = Fcons (make_number (coding.id), Qnil);
8474     }
8475
8476   /* Then, detect eol-format if necessary.  */
8477   {
8478     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8479     Lisp_Object tail;
8480
8481     if (VECTORP (eol_type))
8482       {
8483         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8484           {
8485             if (null_byte_found)
8486               normal_eol = EOL_SEEN_LF;
8487             else
8488               normal_eol = detect_eol (coding.source, src_bytes,
8489                                        coding_category_raw_text);
8490           }
8491         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8492                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8493           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8494                                       coding_category_utf_16_be);
8495         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8496                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8497           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8498                                       coding_category_utf_16_le);
8499       }
8500     else
8501       {
8502         if (EQ (eol_type, Qunix))
8503           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8504         else if (EQ (eol_type, Qdos))
8505           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8506         else
8507           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8508       }
8509
8510     for (tail = val; CONSP (tail); tail = XCDR (tail))
8511       {
8512         enum coding_category category;
8513         int this_eol;
8514
8515         id = XINT (XCAR (tail));
8516         attrs = CODING_ID_ATTRS (id);
8517         category = XINT (CODING_ATTR_CATEGORY (attrs));
8518         eol_type = CODING_ID_EOL_TYPE (id);
8519         if (VECTORP (eol_type))
8520           {
8521             if (category == coding_category_utf_16_be
8522                 || category == coding_category_utf_16_be_nosig)
8523               this_eol = utf_16_be_eol;
8524             else if (category == coding_category_utf_16_le
8525                      || category == coding_category_utf_16_le_nosig)
8526               this_eol = utf_16_le_eol;
8527             else
8528               this_eol = normal_eol;
8529
8530             if (this_eol == EOL_SEEN_LF)
8531               XSETCAR (tail, AREF (eol_type, 0));
8532             else if (this_eol == EOL_SEEN_CRLF)
8533               XSETCAR (tail, AREF (eol_type, 1));
8534             else if (this_eol == EOL_SEEN_CR)
8535               XSETCAR (tail, AREF (eol_type, 2));
8536             else
8537               XSETCAR (tail, CODING_ID_NAME (id));
8538           }
8539         else
8540           XSETCAR (tail, CODING_ID_NAME (id));
8541       }
8542   }
8543
8544   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8545 }
8546
8547
8548 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8549        2, 3, 0,
8550        doc: /* Detect coding system of the text in the region between START and END.
8551 Return a list of possible coding systems ordered by priority.
8552 The coding systems to try and their priorities follows what
8553 the function `coding-system-priority-list' (which see) returns.
8554
8555 If only ASCII characters are found (except for such ISO-2022 control
8556 characters as ESC), it returns a list of single element `undecided'
8557 or its subsidiary coding system according to a detected end-of-line
8558 format.
8559
8560 If optional argument HIGHEST is non-nil, return the coding system of
8561 highest priority.  */)
8562      (start, end, highest)
8563      Lisp_Object start, end, highest;
8564 {
8565   int from, to;
8566   int from_byte, to_byte;
8567
8568   CHECK_NUMBER_COERCE_MARKER (start);
8569   CHECK_NUMBER_COERCE_MARKER (end);
8570
8571   validate_region (&start, &end);
8572   from = XINT (start), to = XINT (end);
8573   from_byte = CHAR_TO_BYTE (from);
8574   to_byte = CHAR_TO_BYTE (to);
8575
8576   if (from < GPT && to >= GPT)
8577     move_gap_both (to, to_byte);
8578
8579   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8580                                to - from, to_byte - from_byte,
8581                                !NILP (highest),
8582                                !NILP (current_buffer
8583                                       ->enable_multibyte_characters),
8584                                Qnil);
8585 }
8586
8587 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8588        1, 2, 0,
8589        doc: /* Detect coding system of the text in STRING.
8590 Return a list of possible coding systems ordered by priority.
8591 The coding systems to try and their priorities follows what
8592 the function `coding-system-priority-list' (which see) returns.
8593
8594 If only ASCII characters are found (except for such ISO-2022 control
8595 characters as ESC), it returns a list of single element `undecided'
8596 or its subsidiary coding system according to a detected end-of-line
8597 format.
8598
8599 If optional argument HIGHEST is non-nil, return the coding system of
8600 highest priority.  */)
8601      (string, highest)
8602      Lisp_Object string, highest;
8603 {
8604   CHECK_STRING (string);
8605
8606   return detect_coding_system (SDATA (string),
8607                                SCHARS (string), SBYTES (string),
8608                                !NILP (highest), STRING_MULTIBYTE (string),
8609                                Qnil);
8610 }
8611
8612
8613 static INLINE int
8614 char_encodable_p (c, attrs)
8615      int c;
8616      Lisp_Object attrs;
8617 {
8618   Lisp_Object tail;
8619   struct charset *charset;
8620   Lisp_Object translation_table;
8621
8622   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8623   if (! NILP (translation_table))
8624     c = translate_char (translation_table, c);
8625   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8626        CONSP (tail); tail = XCDR (tail))
8627     {
8628       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8629       if (CHAR_CHARSET_P (c, charset))
8630         break;
8631     }
8632   return (! NILP (tail));
8633 }
8634
8635
8636 /* Return a list of coding systems that safely encode the text between
8637    START and END.  If EXCLUDE is non-nil, it is a list of coding
8638    systems not to check.  The returned list doesn't contain any such
8639    coding systems.  In any case, if the text contains only ASCII or is
8640    unibyte, return t.  */
8641
8642 DEFUN ("find-coding-systems-region-internal",
8643        Ffind_coding_systems_region_internal,
8644        Sfind_coding_systems_region_internal, 2, 3, 0,
8645        doc: /* Internal use only.  */)
8646      (start, end, exclude)
8647      Lisp_Object start, end, exclude;
8648 {
8649   Lisp_Object coding_attrs_list, safe_codings;
8650   EMACS_INT start_byte, end_byte;
8651   const unsigned char *p, *pbeg, *pend;
8652   int c;
8653   Lisp_Object tail, elt, work_table;
8654
8655   if (STRINGP (start))
8656     {
8657       if (!STRING_MULTIBYTE (start)
8658           || SCHARS (start) == SBYTES (start))
8659         return Qt;
8660       start_byte = 0;
8661       end_byte = SBYTES (start);
8662     }
8663   else
8664     {
8665       CHECK_NUMBER_COERCE_MARKER (start);
8666       CHECK_NUMBER_COERCE_MARKER (end);
8667       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8668         args_out_of_range (start, end);
8669       if (NILP (current_buffer->enable_multibyte_characters))
8670         return Qt;
8671       start_byte = CHAR_TO_BYTE (XINT (start));
8672       end_byte = CHAR_TO_BYTE (XINT (end));
8673       if (XINT (end) - XINT (start) == end_byte - start_byte)
8674         return Qt;
8675
8676       if (XINT (start) < GPT && XINT (end) > GPT)
8677         {
8678           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8679             move_gap_both (XINT (start), start_byte);
8680           else
8681             move_gap_both (XINT (end), end_byte);
8682         }
8683     }
8684
8685   coding_attrs_list = Qnil;
8686   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8687     if (NILP (exclude)
8688         || NILP (Fmemq (XCAR (tail), exclude)))
8689       {
8690         Lisp_Object attrs;
8691
8692         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8693         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8694             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8695           {
8696             ASET (attrs, coding_attr_trans_tbl,
8697                   get_translation_table (attrs, 1, NULL));
8698             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8699           }
8700       }
8701
8702   if (STRINGP (start))
8703     p = pbeg = SDATA (start);
8704   else
8705     p = pbeg = BYTE_POS_ADDR (start_byte);
8706   pend = p + (end_byte - start_byte);
8707
8708   while (p < pend && ASCII_BYTE_P (*p)) p++;
8709   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8710
8711   work_table = Fmake_char_table (Qnil, Qnil);
8712   while (p < pend)
8713     {
8714       if (ASCII_BYTE_P (*p))
8715         p++;
8716       else
8717         {
8718           c = STRING_CHAR_ADVANCE (p);
8719           if (!NILP (char_table_ref (work_table, c)))
8720             /* This character was already checked.  Ignore it.  */
8721             continue;
8722
8723           charset_map_loaded = 0;
8724           for (tail = coding_attrs_list; CONSP (tail);)
8725             {
8726               elt = XCAR (tail);
8727               if (NILP (elt))
8728                 tail = XCDR (tail);
8729               else if (char_encodable_p (c, elt))
8730                 tail = XCDR (tail);
8731               else if (CONSP (XCDR (tail)))
8732                 {
8733                   XSETCAR (tail, XCAR (XCDR (tail)));
8734                   XSETCDR (tail, XCDR (XCDR (tail)));
8735                 }
8736               else
8737                 {
8738                   XSETCAR (tail, Qnil);
8739                   tail = XCDR (tail);
8740                 }
8741             }
8742           if (charset_map_loaded)
8743             {
8744               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8745
8746               if (STRINGP (start))
8747                 pbeg = SDATA (start);
8748               else
8749                 pbeg = BYTE_POS_ADDR (start_byte);
8750               p = pbeg + p_offset;
8751               pend = pbeg + pend_offset;
8752             }
8753           char_table_set (work_table, c, Qt);
8754         }
8755     }
8756
8757   safe_codings = list2 (Qraw_text, Qno_conversion);
8758   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8759     if (! NILP (XCAR (tail)))
8760       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8761
8762   return safe_codings;
8763 }
8764
8765
8766 DEFUN ("unencodable-char-position", Funencodable_char_position,
8767        Sunencodable_char_position, 3, 5, 0,
8768        doc: /*
8769 Return position of first un-encodable character in a region.
8770 START and END specify the region and CODING-SYSTEM specifies the
8771 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8772
8773 If optional 4th argument COUNT is non-nil, it specifies at most how
8774 many un-encodable characters to search.  In this case, the value is a
8775 list of positions.
8776
8777 If optional 5th argument STRING is non-nil, it is a string to search
8778 for un-encodable characters.  In that case, START and END are indexes
8779 to the string.  */)
8780      (start, end, coding_system, count, string)
8781      Lisp_Object start, end, coding_system, count, string;
8782 {
8783   int n;
8784   struct coding_system coding;
8785   Lisp_Object attrs, charset_list, translation_table;
8786   Lisp_Object positions;
8787   int from, to;
8788   const unsigned char *p, *stop, *pend;
8789   int ascii_compatible;
8790
8791   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8792   attrs = CODING_ID_ATTRS (coding.id);
8793   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8794     return Qnil;
8795   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8796   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8797   translation_table = get_translation_table (attrs, 1, NULL);
8798
8799   if (NILP (string))
8800     {
8801       validate_region (&start, &end);
8802       from = XINT (start);
8803       to = XINT (end);
8804       if (NILP (current_buffer->enable_multibyte_characters)
8805           || (ascii_compatible
8806               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8807         return Qnil;
8808       p = CHAR_POS_ADDR (from);
8809       pend = CHAR_POS_ADDR (to);
8810       if (from < GPT && to >= GPT)
8811         stop = GPT_ADDR;
8812       else
8813         stop = pend;
8814     }
8815   else
8816     {
8817       CHECK_STRING (string);
8818       CHECK_NATNUM (start);
8819       CHECK_NATNUM (end);
8820       from = XINT (start);
8821       to = XINT (end);
8822       if (from > to
8823           || to > SCHARS (string))
8824         args_out_of_range_3 (string, start, end);
8825       if (! STRING_MULTIBYTE (string))
8826         return Qnil;
8827       p = SDATA (string) + string_char_to_byte (string, from);
8828       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8829       if (ascii_compatible && (to - from) == (pend - p))
8830         return Qnil;
8831     }
8832
8833   if (NILP (count))
8834     n = 1;
8835   else
8836     {
8837       CHECK_NATNUM (count);
8838       n = XINT (count);
8839     }
8840
8841   positions = Qnil;
8842   while (1)
8843     {
8844       int c;
8845
8846       if (ascii_compatible)
8847         while (p < stop && ASCII_BYTE_P (*p))
8848           p++, from++;
8849       if (p >= stop)
8850         {
8851           if (p >= pend)
8852             break;
8853           stop = pend;
8854           p = GAP_END_ADDR;
8855         }
8856
8857       c = STRING_CHAR_ADVANCE (p);
8858       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8859           && ! char_charset (translate_char (translation_table, c),
8860                              charset_list, NULL))
8861         {
8862           positions = Fcons (make_number (from), positions);
8863           n--;
8864           if (n == 0)
8865             break;
8866         }
8867
8868       from++;
8869     }
8870
8871   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8872 }
8873
8874
8875 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8876        Scheck_coding_systems_region, 3, 3, 0,
8877        doc: /* Check if the region is encodable by coding systems.
8878
8879 START and END are buffer positions specifying the region.
8880 CODING-SYSTEM-LIST is a list of coding systems to check.
8881
8882 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8883 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8884 whole region, POS0, POS1, ... are buffer positions where non-encodable
8885 characters are found.
8886
8887 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8888 value is nil.
8889
8890 START may be a string.  In that case, check if the string is
8891 encodable, and the value contains indices to the string instead of
8892 buffer positions.  END is ignored.
8893
8894 If the current buffer (or START if it is a string) is unibyte, the value
8895 is nil.  */)
8896      (start, end, coding_system_list)
8897      Lisp_Object start, end, coding_system_list;
8898 {
8899   Lisp_Object list;
8900   EMACS_INT start_byte, end_byte;
8901   int pos;
8902   const unsigned char *p, *pbeg, *pend;
8903   int c;
8904   Lisp_Object tail, elt, attrs;
8905
8906   if (STRINGP (start))
8907     {
8908       if (!STRING_MULTIBYTE (start)
8909           || SCHARS (start) == SBYTES (start))
8910         return Qnil;
8911       start_byte = 0;
8912       end_byte = SBYTES (start);
8913       pos = 0;
8914     }
8915   else
8916     {
8917       CHECK_NUMBER_COERCE_MARKER (start);
8918       CHECK_NUMBER_COERCE_MARKER (end);
8919       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8920         args_out_of_range (start, end);
8921       if (NILP (current_buffer->enable_multibyte_characters))
8922         return Qnil;
8923       start_byte = CHAR_TO_BYTE (XINT (start));
8924       end_byte = CHAR_TO_BYTE (XINT (end));
8925       if (XINT (end) - XINT (start) == end_byte - start_byte)
8926         return Qnil;
8927
8928       if (XINT (start) < GPT && XINT (end) > GPT)
8929         {
8930           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8931             move_gap_both (XINT (start), start_byte);
8932           else
8933             move_gap_both (XINT (end), end_byte);
8934         }
8935       pos = XINT (start);
8936     }
8937
8938   list = Qnil;
8939   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8940     {
8941       elt = XCAR (tail);
8942       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8943       ASET (attrs, coding_attr_trans_tbl,
8944             get_translation_table (attrs, 1, NULL));
8945       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8946     }
8947
8948   if (STRINGP (start))
8949     p = pbeg = SDATA (start);
8950   else
8951     p = pbeg = BYTE_POS_ADDR (start_byte);
8952   pend = p + (end_byte - start_byte);
8953
8954   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8955   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8956
8957   while (p < pend)
8958     {
8959       if (ASCII_BYTE_P (*p))
8960         p++;
8961       else
8962         {
8963           c = STRING_CHAR_ADVANCE (p);
8964
8965           charset_map_loaded = 0;
8966           for (tail = list; CONSP (tail); tail = XCDR (tail))
8967             {
8968               elt = XCDR (XCAR (tail));
8969               if (! char_encodable_p (c, XCAR (elt)))
8970                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8971             }
8972           if (charset_map_loaded)
8973             {
8974               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8975
8976               if (STRINGP (start))
8977                 pbeg = SDATA (start);
8978               else
8979                 pbeg = BYTE_POS_ADDR (start_byte);
8980               p = pbeg + p_offset;
8981               pend = pbeg + pend_offset;
8982             }
8983         }
8984       pos++;
8985     }
8986
8987   tail = list;
8988   list = Qnil;
8989   for (; CONSP (tail); tail = XCDR (tail))
8990     {
8991       elt = XCAR (tail);
8992       if (CONSP (XCDR (XCDR (elt))))
8993         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8994                       list);
8995     }
8996
8997   return list;
8998 }
8999
9000
9001 Lisp_Object
9002 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
9003      Lisp_Object start, end, coding_system, dst_object;
9004      int encodep, norecord;
9005 {
9006   struct coding_system coding;
9007   EMACS_INT from, from_byte, to, to_byte;
9008   Lisp_Object src_object;
9009
9010   CHECK_NUMBER_COERCE_MARKER (start);
9011   CHECK_NUMBER_COERCE_MARKER (end);
9012   if (NILP (coding_system))
9013     coding_system = Qno_conversion;
9014   else
9015     CHECK_CODING_SYSTEM (coding_system);
9016   src_object = Fcurrent_buffer ();
9017   if (NILP (dst_object))
9018     dst_object = src_object;
9019   else if (! EQ (dst_object, Qt))
9020     CHECK_BUFFER (dst_object);
9021
9022   validate_region (&start, &end);
9023   from = XFASTINT (start);
9024   from_byte = CHAR_TO_BYTE (from);
9025   to = XFASTINT (end);
9026   to_byte = CHAR_TO_BYTE (to);
9027
9028   setup_coding_system (coding_system, &coding);
9029   coding.mode |= CODING_MODE_LAST_BLOCK;
9030
9031   if (encodep)
9032     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9033                           dst_object);
9034   else
9035     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
9036                           dst_object);
9037   if (! norecord)
9038     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9039
9040   return (BUFFERP (dst_object)
9041           ? make_number (coding.produced_char)
9042           : coding.dst_object);
9043 }
9044
9045
9046 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
9047        3, 4, "r\nzCoding system: ",
9048        doc: /* Decode the current region from the specified coding system.
9049 When called from a program, takes four arguments:
9050         START, END, CODING-SYSTEM, and DESTINATION.
9051 START and END are buffer positions.
9052
9053 Optional 4th arguments DESTINATION specifies where the decoded text goes.
9054 If nil, the region between START and END is replaced by the decoded text.
9055 If buffer, the decoded text is inserted in that buffer after point (point
9056 does not move).
9057 In those cases, the length of the decoded text is returned.
9058 If DESTINATION is t, the decoded text is returned.
9059
9060 This function sets `last-coding-system-used' to the precise coding system
9061 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9062 not fully specified.)  */)
9063      (start, end, coding_system, destination)
9064      Lisp_Object start, end, coding_system, destination;
9065 {
9066   return code_convert_region (start, end, coding_system, destination, 0, 0);
9067 }
9068
9069 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
9070        3, 4, "r\nzCoding system: ",
9071        doc: /* Encode the current region by specified coding system.
9072 When called from a program, takes four arguments:
9073         START, END, CODING-SYSTEM and DESTINATION.
9074 START and END are buffer positions.
9075
9076 Optional 4th arguments DESTINATION specifies where the encoded text goes.
9077 If nil, the region between START and END is replace by the encoded text.
9078 If buffer, the encoded text is inserted in that buffer after point (point
9079 does not move).
9080 In those cases, the length of the encoded text is returned.
9081 If DESTINATION is t, the encoded text is returned.
9082
9083 This function sets `last-coding-system-used' to the precise coding system
9084 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9085 not fully specified.)  */)
9086   (start, end, coding_system, destination)
9087      Lisp_Object start, end, coding_system, destination;
9088 {
9089   return code_convert_region (start, end, coding_system, destination, 1, 0);
9090 }
9091
9092 Lisp_Object
9093 code_convert_string (string, coding_system, dst_object,
9094                      encodep, nocopy, norecord)
9095      Lisp_Object string, coding_system, dst_object;
9096      int encodep, nocopy, norecord;
9097 {
9098   struct coding_system coding;
9099   EMACS_INT chars, bytes;
9100
9101   CHECK_STRING (string);
9102   if (NILP (coding_system))
9103     {
9104       if (! norecord)
9105         Vlast_coding_system_used = Qno_conversion;
9106       if (NILP (dst_object))
9107         return (nocopy ? Fcopy_sequence (string) : string);
9108     }
9109
9110   if (NILP (coding_system))
9111     coding_system = Qno_conversion;
9112   else
9113     CHECK_CODING_SYSTEM (coding_system);
9114   if (NILP (dst_object))
9115     dst_object = Qt;
9116   else if (! EQ (dst_object, Qt))
9117     CHECK_BUFFER (dst_object);
9118
9119   setup_coding_system (coding_system, &coding);
9120   coding.mode |= CODING_MODE_LAST_BLOCK;
9121   chars = SCHARS (string);
9122   bytes = SBYTES (string);
9123   if (encodep)
9124     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9125   else
9126     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9127   if (! norecord)
9128     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9129
9130   return (BUFFERP (dst_object)
9131           ? make_number (coding.produced_char)
9132           : coding.dst_object);
9133 }
9134
9135
9136 /* Encode or decode STRING according to CODING_SYSTEM.
9137    Do not set Vlast_coding_system_used.
9138
9139    This function is called only from macros DECODE_FILE and
9140    ENCODE_FILE, thus we ignore character composition.  */
9141
9142 Lisp_Object
9143 code_convert_string_norecord (string, coding_system, encodep)
9144      Lisp_Object string, coding_system;
9145      int encodep;
9146 {
9147   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9148 }
9149
9150
9151 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9152        2, 4, 0,
9153        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9154
9155 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9156 if the decoding operation is trivial.
9157
9158 Optional fourth arg BUFFER non-nil means that the decoded text is
9159 inserted in that buffer after point (point does not move).  In this
9160 case, the return value is the length of the decoded text.
9161
9162 This function sets `last-coding-system-used' to the precise coding system
9163 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9164 not fully specified.)  */)
9165   (string, coding_system, nocopy, buffer)
9166      Lisp_Object string, coding_system, nocopy, buffer;
9167 {
9168   return code_convert_string (string, coding_system, buffer,
9169                               0, ! NILP (nocopy), 0);
9170 }
9171
9172 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9173        2, 4, 0,
9174        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9175
9176 Optional third arg NOCOPY non-nil means it is OK to return STRING
9177 itself if the encoding operation is trivial.
9178
9179 Optional fourth arg BUFFER non-nil means that the encoded text is
9180 inserted in that buffer after point (point does not move).  In this
9181 case, the return value is the length of the encoded text.
9182
9183 This function sets `last-coding-system-used' to the precise coding system
9184 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9185 not fully specified.)  */)
9186      (string, coding_system, nocopy, buffer)
9187      Lisp_Object string, coding_system, nocopy, buffer;
9188 {
9189   return code_convert_string (string, coding_system, buffer,
9190                               1, ! NILP (nocopy), 1);
9191 }
9192
9193 \f
9194 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9195        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9196 Return the corresponding character.  */)
9197      (code)
9198      Lisp_Object code;
9199 {
9200   Lisp_Object spec, attrs, val;
9201   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9202   int c;
9203
9204   CHECK_NATNUM (code);
9205   c = XFASTINT (code);
9206   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9207   attrs = AREF (spec, 0);
9208
9209   if (ASCII_BYTE_P (c)
9210       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9211     return code;
9212
9213   val = CODING_ATTR_CHARSET_LIST (attrs);
9214   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9215   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9216   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9217
9218   if (c <= 0x7F)
9219     charset = charset_roman;
9220   else if (c >= 0xA0 && c < 0xDF)
9221     {
9222       charset = charset_kana;
9223       c -= 0x80;
9224     }
9225   else
9226     {
9227       int s1 = c >> 8, s2 = c & 0xFF;
9228
9229       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9230           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9231         error ("Invalid code: %d", code);
9232       SJIS_TO_JIS (c);
9233       charset = charset_kanji;
9234     }
9235   c = DECODE_CHAR (charset, c);
9236   if (c < 0)
9237     error ("Invalid code: %d", code);
9238   return make_number (c);
9239 }
9240
9241
9242 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9243        doc: /* Encode a Japanese character CH to shift_jis encoding.
9244 Return the corresponding code in SJIS.  */)
9245      (ch)
9246     Lisp_Object ch;
9247 {
9248   Lisp_Object spec, attrs, charset_list;
9249   int c;
9250   struct charset *charset;
9251   unsigned code;
9252
9253   CHECK_CHARACTER (ch);
9254   c = XFASTINT (ch);
9255   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9256   attrs = AREF (spec, 0);
9257
9258   if (ASCII_CHAR_P (c)
9259       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9260     return ch;
9261
9262   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9263   charset = char_charset (c, charset_list, &code);
9264   if (code == CHARSET_INVALID_CODE (charset))
9265     error ("Can't encode by shift_jis encoding: %d", c);
9266   JIS_TO_SJIS (code);
9267
9268   return make_number (code);
9269 }
9270
9271 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9272        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9273 Return the corresponding character.  */)
9274      (code)
9275      Lisp_Object code;
9276 {
9277   Lisp_Object spec, attrs, val;
9278   struct charset *charset_roman, *charset_big5, *charset;
9279   int c;
9280
9281   CHECK_NATNUM (code);
9282   c = XFASTINT (code);
9283   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9284   attrs = AREF (spec, 0);
9285
9286   if (ASCII_BYTE_P (c)
9287       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9288     return code;
9289
9290   val = CODING_ATTR_CHARSET_LIST (attrs);
9291   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9292   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9293
9294   if (c <= 0x7F)
9295     charset = charset_roman;
9296   else
9297     {
9298       int b1 = c >> 8, b2 = c & 0x7F;
9299       if (b1 < 0xA1 || b1 > 0xFE
9300           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9301         error ("Invalid code: %d", code);
9302       charset = charset_big5;
9303     }
9304   c = DECODE_CHAR (charset, (unsigned )c);
9305   if (c < 0)
9306     error ("Invalid code: %d", code);
9307   return make_number (c);
9308 }
9309
9310 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9311        doc: /* Encode the Big5 character CH to BIG5 coding system.
9312 Return the corresponding character code in Big5.  */)
9313      (ch)
9314      Lisp_Object ch;
9315 {
9316   Lisp_Object spec, attrs, charset_list;
9317   struct charset *charset;
9318   int c;
9319   unsigned code;
9320
9321   CHECK_CHARACTER (ch);
9322   c = XFASTINT (ch);
9323   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9324   attrs = AREF (spec, 0);
9325   if (ASCII_CHAR_P (c)
9326       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9327     return ch;
9328
9329   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9330   charset = char_charset (c, charset_list, &code);
9331   if (code == CHARSET_INVALID_CODE (charset))
9332     error ("Can't encode by Big5 encoding: %d", c);
9333
9334   return make_number (code);
9335 }
9336
9337 \f
9338 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9339        Sset_terminal_coding_system_internal, 1, 2, 0,
9340        doc: /* Internal use only.  */)
9341      (coding_system, terminal)
9342      Lisp_Object coding_system;
9343      Lisp_Object terminal;
9344 {
9345   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9346   CHECK_SYMBOL (coding_system);
9347   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9348   /* We had better not send unsafe characters to terminal.  */
9349   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9350   /* Characer composition should be disabled.  */
9351   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9352   terminal_coding->src_multibyte = 1;
9353   terminal_coding->dst_multibyte = 0;
9354   return Qnil;
9355 }
9356
9357 DEFUN ("set-safe-terminal-coding-system-internal",
9358        Fset_safe_terminal_coding_system_internal,
9359        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9360        doc: /* Internal use only.  */)
9361      (coding_system)
9362      Lisp_Object coding_system;
9363 {
9364   CHECK_SYMBOL (coding_system);
9365   setup_coding_system (Fcheck_coding_system (coding_system),
9366                        &safe_terminal_coding);
9367   /* Characer composition should be disabled.  */
9368   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9369   safe_terminal_coding.src_multibyte = 1;
9370   safe_terminal_coding.dst_multibyte = 0;
9371   return Qnil;
9372 }
9373
9374 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9375        Sterminal_coding_system, 0, 1, 0,
9376        doc: /* Return coding system specified for terminal output on the given terminal.
9377 TERMINAL may be a terminal object, a frame, or nil for the selected
9378 frame's terminal device.  */)
9379      (terminal)
9380      Lisp_Object terminal;
9381 {
9382   struct coding_system *terminal_coding
9383     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9384   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9385
9386   /* For backward compatibility, return nil if it is `undecided'. */
9387   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9388 }
9389
9390 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9391        Sset_keyboard_coding_system_internal, 1, 2, 0,
9392        doc: /* Internal use only.  */)
9393      (coding_system, terminal)
9394      Lisp_Object coding_system;
9395      Lisp_Object terminal;
9396 {
9397   struct terminal *t = get_terminal (terminal, 1);
9398   CHECK_SYMBOL (coding_system);
9399   if (NILP (coding_system))
9400     coding_system = Qno_conversion;
9401   else
9402     Fcheck_coding_system (coding_system);
9403   setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9404   /* Characer composition should be disabled.  */
9405   TERMINAL_KEYBOARD_CODING (t)->common_flags
9406     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9407   return Qnil;
9408 }
9409
9410 DEFUN ("keyboard-coding-system",
9411        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9412        doc: /* Return coding system specified for decoding keyboard input.  */)
9413      (terminal)
9414      Lisp_Object terminal;
9415 {
9416   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9417                          (get_terminal (terminal, 1))->id);
9418 }
9419
9420 \f
9421 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9422        Sfind_operation_coding_system,  1, MANY, 0,
9423        doc: /* Choose a coding system for an operation based on the target name.
9424 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9425 DECODING-SYSTEM is the coding system to use for decoding
9426 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9427 for encoding (in case OPERATION does encoding).
9428
9429 The first argument OPERATION specifies an I/O primitive:
9430   For file I/O, `insert-file-contents' or `write-region'.
9431   For process I/O, `call-process', `call-process-region', or `start-process'.
9432   For network I/O, `open-network-stream'.
9433
9434 The remaining arguments should be the same arguments that were passed
9435 to the primitive.  Depending on which primitive, one of those arguments
9436 is selected as the TARGET.  For example, if OPERATION does file I/O,
9437 whichever argument specifies the file name is TARGET.
9438
9439 TARGET has a meaning which depends on OPERATION:
9440   For file I/O, TARGET is a file name (except for the special case below).
9441   For process I/O, TARGET is a process name.
9442   For network I/O, TARGET is a service name or a port number.
9443
9444 This function looks up what is specified for TARGET in
9445 `file-coding-system-alist', `process-coding-system-alist',
9446 or `network-coding-system-alist' depending on OPERATION.
9447 They may specify a coding system, a cons of coding systems,
9448 or a function symbol to call.
9449 In the last case, we call the function with one argument,
9450 which is a list of all the arguments given to this function.
9451 If the function can't decide a coding system, it can return
9452 `undecided' so that the normal code-detection is performed.
9453
9454 If OPERATION is `insert-file-contents', the argument corresponding to
9455 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9456 file name to look up, and BUFFER is a buffer that contains the file's
9457 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9458 function to call for FILENAME, that function should examine the
9459 contents of BUFFER instead of reading the file.
9460
9461 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9462      (nargs, args)
9463      int nargs;
9464      Lisp_Object *args;
9465 {
9466   Lisp_Object operation, target_idx, target, val;
9467   register Lisp_Object chain;
9468
9469   if (nargs < 2)
9470     error ("Too few arguments");
9471   operation = args[0];
9472   if (!SYMBOLP (operation)
9473       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
9474     error ("Invalid first argument");
9475   if (nargs < 1 + XINT (target_idx))
9476     error ("Too few arguments for operation: %s",
9477            SDATA (SYMBOL_NAME (operation)));
9478   target = args[XINT (target_idx) + 1];
9479   if (!(STRINGP (target)
9480         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9481             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9482         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9483     error ("Invalid %dth argument", XINT (target_idx) + 1);
9484   if (CONSP (target))
9485     target = XCAR (target);
9486
9487   chain = ((EQ (operation, Qinsert_file_contents)
9488             || EQ (operation, Qwrite_region))
9489            ? Vfile_coding_system_alist
9490            : (EQ (operation, Qopen_network_stream)
9491               ? Vnetwork_coding_system_alist
9492               : Vprocess_coding_system_alist));
9493   if (NILP (chain))
9494     return Qnil;
9495
9496   for (; CONSP (chain); chain = XCDR (chain))
9497     {
9498       Lisp_Object elt;
9499
9500       elt = XCAR (chain);
9501       if (CONSP (elt)
9502           && ((STRINGP (target)
9503                && STRINGP (XCAR (elt))
9504                && fast_string_match (XCAR (elt), target) >= 0)
9505               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9506         {
9507           val = XCDR (elt);
9508           /* Here, if VAL is both a valid coding system and a valid
9509              function symbol, we return VAL as a coding system.  */
9510           if (CONSP (val))
9511             return val;
9512           if (! SYMBOLP (val))
9513             return Qnil;
9514           if (! NILP (Fcoding_system_p (val)))
9515             return Fcons (val, val);
9516           if (! NILP (Ffboundp (val)))
9517             {
9518               /* We use call1 rather than safe_call1
9519                  so as to get bug reports about functions called here
9520                  which don't handle the current interface.  */
9521               val = call1 (val, Flist (nargs, args));
9522               if (CONSP (val))
9523                 return val;
9524               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9525                 return Fcons (val, val);
9526             }
9527           return Qnil;
9528         }
9529     }
9530   return Qnil;
9531 }
9532
9533 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9534        Sset_coding_system_priority, 0, MANY, 0,
9535        doc: /* Assign higher priority to the coding systems given as arguments.
9536 If multiple coding systems belong to the same category,
9537 all but the first one are ignored.
9538
9539 usage: (set-coding-system-priority &rest coding-systems)  */)
9540      (nargs, args)
9541      int nargs;
9542      Lisp_Object *args;
9543 {
9544   int i, j;
9545   int changed[coding_category_max];
9546   enum coding_category priorities[coding_category_max];
9547
9548   bzero (changed, sizeof changed);
9549
9550   for (i = j = 0; i < nargs; i++)
9551     {
9552       enum coding_category category;
9553       Lisp_Object spec, attrs;
9554
9555       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9556       attrs = AREF (spec, 0);
9557       category = XINT (CODING_ATTR_CATEGORY (attrs));
9558       if (changed[category])
9559         /* Ignore this coding system because a coding system of the
9560            same category already had a higher priority.  */
9561         continue;
9562       changed[category] = 1;
9563       priorities[j++] = category;
9564       if (coding_categories[category].id >= 0
9565           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9566         setup_coding_system (args[i], &coding_categories[category]);
9567       Fset (AREF (Vcoding_category_table, category), args[i]);
9568     }
9569
9570   /* Now we have decided top J priorities.  Reflect the order of the
9571      original priorities to the remaining priorities.  */
9572
9573   for (i = j, j = 0; i < coding_category_max; i++, j++)
9574     {
9575       while (j < coding_category_max
9576              && changed[coding_priorities[j]])
9577         j++;
9578       if (j == coding_category_max)
9579         abort ();
9580       priorities[i] = coding_priorities[j];
9581     }
9582
9583   bcopy (priorities, coding_priorities, sizeof priorities);
9584
9585   /* Update `coding-category-list'.  */
9586   Vcoding_category_list = Qnil;
9587   for (i = coding_category_max - 1; i >= 0; i--)
9588     Vcoding_category_list
9589       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9590                Vcoding_category_list);
9591
9592   return Qnil;
9593 }
9594
9595 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9596        Scoding_system_priority_list, 0, 1, 0,
9597        doc: /* Return a list of coding systems ordered by their priorities.
9598 The list contains a subset of coding systems; i.e. coding systems
9599 assigned to each coding category (see `coding-category-list').
9600
9601 HIGHESTP non-nil means just return the highest priority one.  */)
9602      (highestp)
9603      Lisp_Object highestp;
9604 {
9605   int i;
9606   Lisp_Object val;
9607
9608   for (i = 0, val = Qnil; i < coding_category_max; i++)
9609     {
9610       enum coding_category category = coding_priorities[i];
9611       int id = coding_categories[category].id;
9612       Lisp_Object attrs;
9613
9614       if (id < 0)
9615         continue;
9616       attrs = CODING_ID_ATTRS (id);
9617       if (! NILP (highestp))
9618         return CODING_ATTR_BASE_NAME (attrs);
9619       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9620     }
9621   return Fnreverse (val);
9622 }
9623
9624 static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
9625
9626 static Lisp_Object
9627 make_subsidiaries (base)
9628      Lisp_Object base;
9629 {
9630   Lisp_Object subsidiaries;
9631   int base_name_len = SBYTES (SYMBOL_NAME (base));
9632   char *buf = (char *) alloca (base_name_len + 6);
9633   int i;
9634
9635   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9636   subsidiaries = Fmake_vector (make_number (3), Qnil);
9637   for (i = 0; i < 3; i++)
9638     {
9639       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9640       ASET (subsidiaries, i, intern (buf));
9641     }
9642   return subsidiaries;
9643 }
9644
9645
9646 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9647        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9648        doc: /* For internal use only.
9649 usage: (define-coding-system-internal ...)  */)
9650      (nargs, args)
9651      int nargs;
9652      Lisp_Object *args;
9653 {
9654   Lisp_Object name;
9655   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9656   Lisp_Object attrs;            /* Vector of attributes.  */
9657   Lisp_Object eol_type;
9658   Lisp_Object aliases;
9659   Lisp_Object coding_type, charset_list, safe_charsets;
9660   enum coding_category category;
9661   Lisp_Object tail, val;
9662   int max_charset_id = 0;
9663   int i;
9664
9665   if (nargs < coding_arg_max)
9666     goto short_args;
9667
9668   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9669
9670   name = args[coding_arg_name];
9671   CHECK_SYMBOL (name);
9672   CODING_ATTR_BASE_NAME (attrs) = name;
9673
9674   val = args[coding_arg_mnemonic];
9675   if (! STRINGP (val))
9676     CHECK_CHARACTER (val);
9677   CODING_ATTR_MNEMONIC (attrs) = val;
9678
9679   coding_type = args[coding_arg_coding_type];
9680   CHECK_SYMBOL (coding_type);
9681   CODING_ATTR_TYPE (attrs) = coding_type;
9682
9683   charset_list = args[coding_arg_charset_list];
9684   if (SYMBOLP (charset_list))
9685     {
9686       if (EQ (charset_list, Qiso_2022))
9687         {
9688           if (! EQ (coding_type, Qiso_2022))
9689             error ("Invalid charset-list");
9690           charset_list = Viso_2022_charset_list;
9691         }
9692       else if (EQ (charset_list, Qemacs_mule))
9693         {
9694           if (! EQ (coding_type, Qemacs_mule))
9695             error ("Invalid charset-list");
9696           charset_list = Vemacs_mule_charset_list;
9697         }
9698       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9699         if (max_charset_id < XFASTINT (XCAR (tail)))
9700           max_charset_id = XFASTINT (XCAR (tail));
9701     }
9702   else
9703     {
9704       charset_list = Fcopy_sequence (charset_list);
9705       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9706         {
9707           struct charset *charset;
9708
9709           val = XCAR (tail);
9710           CHECK_CHARSET_GET_CHARSET (val, charset);
9711           if (EQ (coding_type, Qiso_2022)
9712               ? CHARSET_ISO_FINAL (charset) < 0
9713               : EQ (coding_type, Qemacs_mule)
9714               ? CHARSET_EMACS_MULE_ID (charset) < 0
9715               : 0)
9716             error ("Can't handle charset `%s'",
9717                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9718
9719           XSETCAR (tail, make_number (charset->id));
9720           if (max_charset_id < charset->id)
9721             max_charset_id = charset->id;
9722         }
9723     }
9724   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9725
9726   safe_charsets = make_uninit_string (max_charset_id + 1);
9727   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9728   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9729     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9730   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9731
9732   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9733
9734   val = args[coding_arg_decode_translation_table];
9735   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9736     CHECK_SYMBOL (val);
9737   CODING_ATTR_DECODE_TBL (attrs) = val;
9738
9739   val = args[coding_arg_encode_translation_table];
9740   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9741     CHECK_SYMBOL (val);
9742   CODING_ATTR_ENCODE_TBL (attrs) = val;
9743
9744   val = args[coding_arg_post_read_conversion];
9745   CHECK_SYMBOL (val);
9746   CODING_ATTR_POST_READ (attrs) = val;
9747
9748   val = args[coding_arg_pre_write_conversion];
9749   CHECK_SYMBOL (val);
9750   CODING_ATTR_PRE_WRITE (attrs) = val;
9751
9752   val = args[coding_arg_default_char];
9753   if (NILP (val))
9754     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9755   else
9756     {
9757       CHECK_CHARACTER (val);
9758       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9759     }
9760
9761   val = args[coding_arg_for_unibyte];
9762   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9763
9764   val = args[coding_arg_plist];
9765   CHECK_LIST (val);
9766   CODING_ATTR_PLIST (attrs) = val;
9767
9768   if (EQ (coding_type, Qcharset))
9769     {
9770       /* Generate a lisp vector of 256 elements.  Each element is nil,
9771          integer, or a list of charset IDs.
9772
9773          If Nth element is nil, the byte code N is invalid in this
9774          coding system.
9775
9776          If Nth element is a number NUM, N is the first byte of a
9777          charset whose ID is NUM.
9778
9779          If Nth element is a list of charset IDs, N is the first byte
9780          of one of them.  The list is sorted by dimensions of the
9781          charsets.  A charset of smaller dimension comes firtst. */
9782       val = Fmake_vector (make_number (256), Qnil);
9783
9784       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9785         {
9786           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9787           int dim = CHARSET_DIMENSION (charset);
9788           int idx = (dim - 1) * 4;
9789
9790           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9791             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9792
9793           for (i = charset->code_space[idx];
9794                i <= charset->code_space[idx + 1]; i++)
9795             {
9796               Lisp_Object tmp, tmp2;
9797               int dim2;
9798
9799               tmp = AREF (val, i);
9800               if (NILP (tmp))
9801                 tmp = XCAR (tail);
9802               else if (NUMBERP (tmp))
9803                 {
9804                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9805                   if (dim < dim2)
9806                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9807                   else
9808                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9809                 }
9810               else
9811                 {
9812                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9813                     {
9814                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9815                       if (dim < dim2)
9816                         break;
9817                     }
9818                   if (NILP (tmp2))
9819                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9820                   else
9821                     {
9822                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9823                       XSETCAR (tmp2, XCAR (tail));
9824                     }
9825                 }
9826               ASET (val, i, tmp);
9827             }
9828         }
9829       ASET (attrs, coding_attr_charset_valids, val);
9830       category = coding_category_charset;
9831     }
9832   else if (EQ (coding_type, Qccl))
9833     {
9834       Lisp_Object valids;
9835
9836       if (nargs < coding_arg_ccl_max)
9837         goto short_args;
9838
9839       val = args[coding_arg_ccl_decoder];
9840       CHECK_CCL_PROGRAM (val);
9841       if (VECTORP (val))
9842         val = Fcopy_sequence (val);
9843       ASET (attrs, coding_attr_ccl_decoder, val);
9844
9845       val = args[coding_arg_ccl_encoder];
9846       CHECK_CCL_PROGRAM (val);
9847       if (VECTORP (val))
9848         val = Fcopy_sequence (val);
9849       ASET (attrs, coding_attr_ccl_encoder, val);
9850
9851       val = args[coding_arg_ccl_valids];
9852       valids = Fmake_string (make_number (256), make_number (0));
9853       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9854         {
9855           int from, to;
9856
9857           val = Fcar (tail);
9858           if (INTEGERP (val))
9859             {
9860               from = to = XINT (val);
9861               if (from < 0 || from > 255)
9862                 args_out_of_range_3 (val, make_number (0), make_number (255));
9863             }
9864           else
9865             {
9866               CHECK_CONS (val);
9867               CHECK_NATNUM_CAR (val);
9868               CHECK_NATNUM_CDR (val);
9869               from = XINT (XCAR (val));
9870               if (from > 255)
9871                 args_out_of_range_3 (XCAR (val),
9872                                      make_number (0), make_number (255));
9873               to = XINT (XCDR (val));
9874               if (to < from || to > 255)
9875                 args_out_of_range_3 (XCDR (val),
9876                                      XCAR (val), make_number (255));
9877             }
9878           for (i = from; i <= to; i++)
9879             SSET (valids, i, 1);
9880         }
9881       ASET (attrs, coding_attr_ccl_valids, valids);
9882
9883       category = coding_category_ccl;
9884     }
9885   else if (EQ (coding_type, Qutf_16))
9886     {
9887       Lisp_Object bom, endian;
9888
9889       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9890
9891       if (nargs < coding_arg_utf16_max)
9892         goto short_args;
9893
9894       bom = args[coding_arg_utf16_bom];
9895       if (! NILP (bom) && ! EQ (bom, Qt))
9896         {
9897           CHECK_CONS (bom);
9898           val = XCAR (bom);
9899           CHECK_CODING_SYSTEM (val);
9900           val = XCDR (bom);
9901           CHECK_CODING_SYSTEM (val);
9902         }
9903       ASET (attrs, coding_attr_utf_bom, bom);
9904
9905       endian = args[coding_arg_utf16_endian];
9906       CHECK_SYMBOL (endian);
9907       if (NILP (endian))
9908         endian = Qbig;
9909       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9910         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9911       ASET (attrs, coding_attr_utf_16_endian, endian);
9912
9913       category = (CONSP (bom)
9914                   ? coding_category_utf_16_auto
9915                   : NILP (bom)
9916                   ? (EQ (endian, Qbig)
9917                      ? coding_category_utf_16_be_nosig
9918                      : coding_category_utf_16_le_nosig)
9919                   : (EQ (endian, Qbig)
9920                      ? coding_category_utf_16_be
9921                      : coding_category_utf_16_le));
9922     }
9923   else if (EQ (coding_type, Qiso_2022))
9924     {
9925       Lisp_Object initial, reg_usage, request, flags;
9926       int i;
9927
9928       if (nargs < coding_arg_iso2022_max)
9929         goto short_args;
9930
9931       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9932       CHECK_VECTOR (initial);
9933       for (i = 0; i < 4; i++)
9934         {
9935           val = Faref (initial, make_number (i));
9936           if (! NILP (val))
9937             {
9938               struct charset *charset;
9939
9940               CHECK_CHARSET_GET_CHARSET (val, charset);
9941               ASET (initial, i, make_number (CHARSET_ID (charset)));
9942               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9943                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9944             }
9945           else
9946             ASET (initial, i, make_number (-1));
9947         }
9948
9949       reg_usage = args[coding_arg_iso2022_reg_usage];
9950       CHECK_CONS (reg_usage);
9951       CHECK_NUMBER_CAR (reg_usage);
9952       CHECK_NUMBER_CDR (reg_usage);
9953
9954       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9955       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9956         {
9957           int id;
9958           Lisp_Object tmp;
9959
9960           val = Fcar (tail);
9961           CHECK_CONS (val);
9962           tmp = XCAR (val);
9963           CHECK_CHARSET_GET_ID (tmp, id);
9964           CHECK_NATNUM_CDR (val);
9965           if (XINT (XCDR (val)) >= 4)
9966             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9967           XSETCAR (val, make_number (id));
9968         }
9969
9970       flags = args[coding_arg_iso2022_flags];
9971       CHECK_NATNUM (flags);
9972       i = XINT (flags);
9973       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9974         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9975
9976       ASET (attrs, coding_attr_iso_initial, initial);
9977       ASET (attrs, coding_attr_iso_usage, reg_usage);
9978       ASET (attrs, coding_attr_iso_request, request);
9979       ASET (attrs, coding_attr_iso_flags, flags);
9980       setup_iso_safe_charsets (attrs);
9981
9982       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9983         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9984                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9985                     ? coding_category_iso_7_else
9986                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9987                     ? coding_category_iso_7
9988                     : coding_category_iso_7_tight);
9989       else
9990         {
9991           int id = XINT (AREF (initial, 1));
9992
9993           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9994                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9995                        || id < 0)
9996                       ? coding_category_iso_8_else
9997                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9998                       ? coding_category_iso_8_1
9999                       : coding_category_iso_8_2);
10000         }
10001       if (category != coding_category_iso_8_1
10002           && category != coding_category_iso_8_2)
10003         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
10004     }
10005   else if (EQ (coding_type, Qemacs_mule))
10006     {
10007       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
10008         ASET (attrs, coding_attr_emacs_mule_full, Qt);
10009       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10010       category = coding_category_emacs_mule;
10011     }
10012   else if (EQ (coding_type, Qshift_jis))
10013     {
10014
10015       struct charset *charset;
10016
10017       if (XINT (Flength (charset_list)) != 3
10018           && XINT (Flength (charset_list)) != 4)
10019         error ("There should be three or four charsets");
10020
10021       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10022       if (CHARSET_DIMENSION (charset) != 1)
10023         error ("Dimension of charset %s is not one",
10024                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10025       if (CHARSET_ASCII_COMPATIBLE_P (charset))
10026         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10027
10028       charset_list = XCDR (charset_list);
10029       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10030       if (CHARSET_DIMENSION (charset) != 1)
10031         error ("Dimension of charset %s is not one",
10032                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10033
10034       charset_list = XCDR (charset_list);
10035       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10036       if (CHARSET_DIMENSION (charset) != 2)
10037         error ("Dimension of charset %s is not two",
10038                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10039
10040       charset_list = XCDR (charset_list);
10041       if (! NILP (charset_list))
10042         {
10043           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10044           if (CHARSET_DIMENSION (charset) != 2)
10045             error ("Dimension of charset %s is not two",
10046                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10047         }
10048
10049       category = coding_category_sjis;
10050       Vsjis_coding_system = name;
10051     }
10052   else if (EQ (coding_type, Qbig5))
10053     {
10054       struct charset *charset;
10055
10056       if (XINT (Flength (charset_list)) != 2)
10057         error ("There should be just two charsets");
10058
10059       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10060       if (CHARSET_DIMENSION (charset) != 1)
10061         error ("Dimension of charset %s is not one",
10062                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10063       if (CHARSET_ASCII_COMPATIBLE_P (charset))
10064         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10065
10066       charset_list = XCDR (charset_list);
10067       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
10068       if (CHARSET_DIMENSION (charset) != 2)
10069         error ("Dimension of charset %s is not two",
10070                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
10071
10072       category = coding_category_big5;
10073       Vbig5_coding_system = name;
10074     }
10075   else if (EQ (coding_type, Qraw_text))
10076     {
10077       category = coding_category_raw_text;
10078       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10079     }
10080   else if (EQ (coding_type, Qutf_8))
10081     {
10082       Lisp_Object bom;
10083
10084       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
10085
10086       if (nargs < coding_arg_utf8_max)
10087         goto short_args;
10088
10089       bom = args[coding_arg_utf8_bom];
10090       if (! NILP (bom) && ! EQ (bom, Qt))
10091         {
10092           CHECK_CONS (bom);
10093           val = XCAR (bom);
10094           CHECK_CODING_SYSTEM (val);
10095           val = XCDR (bom);
10096           CHECK_CODING_SYSTEM (val);
10097         }
10098       ASET (attrs, coding_attr_utf_bom, bom);
10099
10100       category = (CONSP (bom) ? coding_category_utf_8_auto
10101                   : NILP (bom) ? coding_category_utf_8_nosig
10102                   : coding_category_utf_8_sig);
10103     }
10104   else if (EQ (coding_type, Qundecided))
10105     category = coding_category_undecided;
10106   else
10107     error ("Invalid coding system type: %s",
10108            SDATA (SYMBOL_NAME (coding_type)));
10109
10110   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10111   CODING_ATTR_PLIST (attrs)
10112     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10113                                 CODING_ATTR_PLIST (attrs)));
10114   CODING_ATTR_PLIST (attrs)
10115     = Fcons (QCascii_compatible_p,
10116              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10117                     CODING_ATTR_PLIST (attrs)));
10118
10119   eol_type = args[coding_arg_eol_type];
10120   if (! NILP (eol_type)
10121       && ! EQ (eol_type, Qunix)
10122       && ! EQ (eol_type, Qdos)
10123       && ! EQ (eol_type, Qmac))
10124     error ("Invalid eol-type");
10125
10126   aliases = Fcons (name, Qnil);
10127
10128   if (NILP (eol_type))
10129     {
10130       eol_type = make_subsidiaries (name);
10131       for (i = 0; i < 3; i++)
10132         {
10133           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10134
10135           this_name = AREF (eol_type, i);
10136           this_aliases = Fcons (this_name, Qnil);
10137           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10138           this_spec = Fmake_vector (make_number (3), attrs);
10139           ASET (this_spec, 1, this_aliases);
10140           ASET (this_spec, 2, this_eol_type);
10141           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10142           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10143           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10144           if (NILP (val))
10145             Vcoding_system_alist
10146               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10147                        Vcoding_system_alist);
10148         }
10149     }
10150
10151   spec_vec = Fmake_vector (make_number (3), attrs);
10152   ASET (spec_vec, 1, aliases);
10153   ASET (spec_vec, 2, eol_type);
10154
10155   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10156   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10157   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10158   if (NILP (val))
10159     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10160                                   Vcoding_system_alist);
10161
10162   {
10163     int id = coding_categories[category].id;
10164
10165     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10166       setup_coding_system (name, &coding_categories[category]);
10167   }
10168
10169   return Qnil;
10170
10171  short_args:
10172   return Fsignal (Qwrong_number_of_arguments,
10173                   Fcons (intern ("define-coding-system-internal"),
10174                          make_number (nargs)));
10175 }
10176
10177
10178 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10179        3, 3, 0,
10180        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10181   (coding_system, prop, val)
10182      Lisp_Object coding_system, prop, val;
10183 {
10184   Lisp_Object spec, attrs;
10185
10186   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10187   attrs = AREF (spec, 0);
10188   if (EQ (prop, QCmnemonic))
10189     {
10190       if (! STRINGP (val))
10191         CHECK_CHARACTER (val);
10192       CODING_ATTR_MNEMONIC (attrs) = val;
10193     }
10194   else if (EQ (prop, QCdefault_char))
10195     {
10196       if (NILP (val))
10197         val = make_number (' ');
10198       else
10199         CHECK_CHARACTER (val);
10200       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10201     }
10202   else if (EQ (prop, QCdecode_translation_table))
10203     {
10204       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10205         CHECK_SYMBOL (val);
10206       CODING_ATTR_DECODE_TBL (attrs) = val;
10207     }
10208   else if (EQ (prop, QCencode_translation_table))
10209     {
10210       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10211         CHECK_SYMBOL (val);
10212       CODING_ATTR_ENCODE_TBL (attrs) = val;
10213     }
10214   else if (EQ (prop, QCpost_read_conversion))
10215     {
10216       CHECK_SYMBOL (val);
10217       CODING_ATTR_POST_READ (attrs) = val;
10218     }
10219   else if (EQ (prop, QCpre_write_conversion))
10220     {
10221       CHECK_SYMBOL (val);
10222       CODING_ATTR_PRE_WRITE (attrs) = val;
10223     }
10224   else if (EQ (prop, QCascii_compatible_p))
10225     {
10226       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10227     }
10228
10229   CODING_ATTR_PLIST (attrs)
10230     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10231   return val;
10232 }
10233
10234
10235 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10236        Sdefine_coding_system_alias, 2, 2, 0,
10237        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10238      (alias, coding_system)
10239      Lisp_Object alias, coding_system;
10240 {
10241   Lisp_Object spec, aliases, eol_type, val;
10242
10243   CHECK_SYMBOL (alias);
10244   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10245   aliases = AREF (spec, 1);
10246   /* ALIASES should be a list of length more than zero, and the first
10247      element is a base coding system.  Append ALIAS at the tail of the
10248      list.  */
10249   while (!NILP (XCDR (aliases)))
10250     aliases = XCDR (aliases);
10251   XSETCDR (aliases, Fcons (alias, Qnil));
10252
10253   eol_type = AREF (spec, 2);
10254   if (VECTORP (eol_type))
10255     {
10256       Lisp_Object subsidiaries;
10257       int i;
10258
10259       subsidiaries = make_subsidiaries (alias);
10260       for (i = 0; i < 3; i++)
10261         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10262                                      AREF (eol_type, i));
10263     }
10264
10265   Fputhash (alias, spec, Vcoding_system_hash_table);
10266   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10267   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10268   if (NILP (val))
10269     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10270                                   Vcoding_system_alist);
10271
10272   return Qnil;
10273 }
10274
10275 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10276        1, 1, 0,
10277        doc: /* Return the base of CODING-SYSTEM.
10278 Any alias or subsidiary coding system is not a base coding system.  */)
10279   (coding_system)
10280      Lisp_Object coding_system;
10281 {
10282   Lisp_Object spec, attrs;
10283
10284   if (NILP (coding_system))
10285     return (Qno_conversion);
10286   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10287   attrs = AREF (spec, 0);
10288   return CODING_ATTR_BASE_NAME (attrs);
10289 }
10290
10291 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10292        1, 1, 0,
10293        doc: "Return the property list of CODING-SYSTEM.")
10294      (coding_system)
10295      Lisp_Object coding_system;
10296 {
10297   Lisp_Object spec, attrs;
10298
10299   if (NILP (coding_system))
10300     coding_system = Qno_conversion;
10301   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10302   attrs = AREF (spec, 0);
10303   return CODING_ATTR_PLIST (attrs);
10304 }
10305
10306
10307 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10308        1, 1, 0,
10309        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10310      (coding_system)
10311      Lisp_Object coding_system;
10312 {
10313   Lisp_Object spec;
10314
10315   if (NILP (coding_system))
10316     coding_system = Qno_conversion;
10317   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10318   return AREF (spec, 1);
10319 }
10320
10321 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10322        Scoding_system_eol_type, 1, 1, 0,
10323        doc: /* Return eol-type of CODING-SYSTEM.
10324 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10325
10326 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10327 and CR respectively.
10328
10329 A vector value indicates that a format of end-of-line should be
10330 detected automatically.  Nth element of the vector is the subsidiary
10331 coding system whose eol-type is N.  */)
10332      (coding_system)
10333      Lisp_Object coding_system;
10334 {
10335   Lisp_Object spec, eol_type;
10336   int n;
10337
10338   if (NILP (coding_system))
10339     coding_system = Qno_conversion;
10340   if (! CODING_SYSTEM_P (coding_system))
10341     return Qnil;
10342   spec = CODING_SYSTEM_SPEC (coding_system);
10343   eol_type = AREF (spec, 2);
10344   if (VECTORP (eol_type))
10345     return Fcopy_sequence (eol_type);
10346   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10347   return make_number (n);
10348 }
10349
10350 #endif /* emacs */
10351
10352 \f
10353 /*** 9. Post-amble ***/
10354
10355 void
10356 init_coding_once ()
10357 {
10358   int i;
10359
10360   for (i = 0; i < coding_category_max; i++)
10361     {
10362       coding_categories[i].id = -1;
10363       coding_priorities[i] = i;
10364     }
10365
10366   /* ISO2022 specific initialize routine.  */
10367   for (i = 0; i < 0x20; i++)
10368     iso_code_class[i] = ISO_control_0;
10369   for (i = 0x21; i < 0x7F; i++)
10370     iso_code_class[i] = ISO_graphic_plane_0;
10371   for (i = 0x80; i < 0xA0; i++)
10372     iso_code_class[i] = ISO_control_1;
10373   for (i = 0xA1; i < 0xFF; i++)
10374     iso_code_class[i] = ISO_graphic_plane_1;
10375   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10376   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10377   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10378   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10379   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10380   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10381   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10382   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10383   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10384
10385   for (i = 0; i < 256; i++)
10386     {
10387       emacs_mule_bytes[i] = 1;
10388     }
10389   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10390   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10391   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10392   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10393 }
10394
10395 #ifdef emacs
10396
10397 void
10398 syms_of_coding ()
10399 {
10400   staticpro (&Vcoding_system_hash_table);
10401   {
10402     Lisp_Object args[2];
10403     args[0] = QCtest;
10404     args[1] = Qeq;
10405     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10406   }
10407
10408   staticpro (&Vsjis_coding_system);
10409   Vsjis_coding_system = Qnil;
10410
10411   staticpro (&Vbig5_coding_system);
10412   Vbig5_coding_system = Qnil;
10413
10414   staticpro (&Vcode_conversion_reused_workbuf);
10415   Vcode_conversion_reused_workbuf = Qnil;
10416
10417   staticpro (&Vcode_conversion_workbuf_name);
10418   Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
10419
10420   reused_workbuf_in_use = 0;
10421
10422   DEFSYM (Qcharset, "charset");
10423   DEFSYM (Qtarget_idx, "target-idx");
10424   DEFSYM (Qcoding_system_history, "coding-system-history");
10425   Fset (Qcoding_system_history, Qnil);
10426
10427   /* Target FILENAME is the first argument.  */
10428   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10429   /* Target FILENAME is the third argument.  */
10430   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10431
10432   DEFSYM (Qcall_process, "call-process");
10433   /* Target PROGRAM is the first argument.  */
10434   Fput (Qcall_process, Qtarget_idx, make_number (0));
10435
10436   DEFSYM (Qcall_process_region, "call-process-region");
10437   /* Target PROGRAM is the third argument.  */
10438   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10439
10440   DEFSYM (Qstart_process, "start-process");
10441   /* Target PROGRAM is the third argument.  */
10442   Fput (Qstart_process, Qtarget_idx, make_number (2));
10443
10444   DEFSYM (Qopen_network_stream, "open-network-stream");
10445   /* Target SERVICE is the fourth argument.  */
10446   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10447
10448   DEFSYM (Qcoding_system, "coding-system");
10449   DEFSYM (Qcoding_aliases, "coding-aliases");
10450
10451   DEFSYM (Qeol_type, "eol-type");
10452   DEFSYM (Qunix, "unix");
10453   DEFSYM (Qdos, "dos");
10454
10455   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10456   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10457   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10458   DEFSYM (Qdefault_char, "default-char");
10459   DEFSYM (Qundecided, "undecided");
10460   DEFSYM (Qno_conversion, "no-conversion");
10461   DEFSYM (Qraw_text, "raw-text");
10462
10463   DEFSYM (Qiso_2022, "iso-2022");
10464
10465   DEFSYM (Qutf_8, "utf-8");
10466   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10467
10468   DEFSYM (Qutf_16, "utf-16");
10469   DEFSYM (Qbig, "big");
10470   DEFSYM (Qlittle, "little");
10471
10472   DEFSYM (Qshift_jis, "shift-jis");
10473   DEFSYM (Qbig5, "big5");
10474
10475   DEFSYM (Qcoding_system_p, "coding-system-p");
10476
10477   DEFSYM (Qcoding_system_error, "coding-system-error");
10478   Fput (Qcoding_system_error, Qerror_conditions,
10479         pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
10480   Fput (Qcoding_system_error, Qerror_message,
10481         make_pure_c_string ("Invalid coding system"));
10482
10483   /* Intern this now in case it isn't already done.
10484      Setting this variable twice is harmless.
10485      But don't staticpro it here--that is done in alloc.c.  */
10486   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10487
10488   DEFSYM (Qtranslation_table, "translation-table");
10489   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10490   DEFSYM (Qtranslation_table_id, "translation-table-id");
10491   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10492   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10493
10494   DEFSYM (Qvalid_codes, "valid-codes");
10495
10496   DEFSYM (Qemacs_mule, "emacs-mule");
10497
10498   DEFSYM (QCcategory, ":category");
10499   DEFSYM (QCmnemonic, ":mnemonic");
10500   DEFSYM (QCdefault_char, ":default-char");
10501   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10502   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10503   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10504   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10505   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10506
10507   Vcoding_category_table
10508     = Fmake_vector (make_number (coding_category_max), Qnil);
10509   staticpro (&Vcoding_category_table);
10510   /* Followings are target of code detection.  */
10511   ASET (Vcoding_category_table, coding_category_iso_7,
10512         intern_c_string ("coding-category-iso-7"));
10513   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10514         intern_c_string ("coding-category-iso-7-tight"));
10515   ASET (Vcoding_category_table, coding_category_iso_8_1,
10516         intern_c_string ("coding-category-iso-8-1"));
10517   ASET (Vcoding_category_table, coding_category_iso_8_2,
10518         intern_c_string ("coding-category-iso-8-2"));
10519   ASET (Vcoding_category_table, coding_category_iso_7_else,
10520         intern_c_string ("coding-category-iso-7-else"));
10521   ASET (Vcoding_category_table, coding_category_iso_8_else,
10522         intern_c_string ("coding-category-iso-8-else"));
10523   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10524         intern_c_string ("coding-category-utf-8-auto"));
10525   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10526         intern_c_string ("coding-category-utf-8"));
10527   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10528         intern_c_string ("coding-category-utf-8-sig"));
10529   ASET (Vcoding_category_table, coding_category_utf_16_be,
10530         intern_c_string ("coding-category-utf-16-be"));
10531   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10532         intern_c_string ("coding-category-utf-16-auto"));
10533   ASET (Vcoding_category_table, coding_category_utf_16_le,
10534         intern_c_string ("coding-category-utf-16-le"));
10535   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10536         intern_c_string ("coding-category-utf-16-be-nosig"));
10537   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10538         intern_c_string ("coding-category-utf-16-le-nosig"));
10539   ASET (Vcoding_category_table, coding_category_charset,
10540         intern_c_string ("coding-category-charset"));
10541   ASET (Vcoding_category_table, coding_category_sjis,
10542         intern_c_string ("coding-category-sjis"));
10543   ASET (Vcoding_category_table, coding_category_big5,
10544         intern_c_string ("coding-category-big5"));
10545   ASET (Vcoding_category_table, coding_category_ccl,
10546         intern_c_string ("coding-category-ccl"));
10547   ASET (Vcoding_category_table, coding_category_emacs_mule,
10548         intern_c_string ("coding-category-emacs-mule"));
10549   /* Followings are NOT target of code detection.  */
10550   ASET (Vcoding_category_table, coding_category_raw_text,
10551         intern_c_string ("coding-category-raw-text"));
10552   ASET (Vcoding_category_table, coding_category_undecided,
10553         intern_c_string ("coding-category-undecided"));
10554
10555   DEFSYM (Qinsufficient_source, "insufficient-source");
10556   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10557   DEFSYM (Qinvalid_source, "invalid-source");
10558   DEFSYM (Qinterrupted, "interrupted");
10559   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10560   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10561
10562   defsubr (&Scoding_system_p);
10563   defsubr (&Sread_coding_system);
10564   defsubr (&Sread_non_nil_coding_system);
10565   defsubr (&Scheck_coding_system);
10566   defsubr (&Sdetect_coding_region);
10567   defsubr (&Sdetect_coding_string);
10568   defsubr (&Sfind_coding_systems_region_internal);
10569   defsubr (&Sunencodable_char_position);
10570   defsubr (&Scheck_coding_systems_region);
10571   defsubr (&Sdecode_coding_region);
10572   defsubr (&Sencode_coding_region);
10573   defsubr (&Sdecode_coding_string);
10574   defsubr (&Sencode_coding_string);
10575   defsubr (&Sdecode_sjis_char);
10576   defsubr (&Sencode_sjis_char);
10577   defsubr (&Sdecode_big5_char);
10578   defsubr (&Sencode_big5_char);
10579   defsubr (&Sset_terminal_coding_system_internal);
10580   defsubr (&Sset_safe_terminal_coding_system_internal);
10581   defsubr (&Sterminal_coding_system);
10582   defsubr (&Sset_keyboard_coding_system_internal);
10583   defsubr (&Skeyboard_coding_system);
10584   defsubr (&Sfind_operation_coding_system);
10585   defsubr (&Sset_coding_system_priority);
10586   defsubr (&Sdefine_coding_system_internal);
10587   defsubr (&Sdefine_coding_system_alias);
10588   defsubr (&Scoding_system_put);
10589   defsubr (&Scoding_system_base);
10590   defsubr (&Scoding_system_plist);
10591   defsubr (&Scoding_system_aliases);
10592   defsubr (&Scoding_system_eol_type);
10593   defsubr (&Scoding_system_priority_list);
10594
10595   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10596                doc: /* List of coding systems.
10597
10598 Do not alter the value of this variable manually.  This variable should be
10599 updated by the functions `define-coding-system' and
10600 `define-coding-system-alias'.  */);
10601   Vcoding_system_list = Qnil;
10602
10603   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10604                doc: /* Alist of coding system names.
10605 Each element is one element list of coding system name.
10606 This variable is given to `completing-read' as COLLECTION argument.
10607
10608 Do not alter the value of this variable manually.  This variable should be
10609 updated by the functions `make-coding-system' and
10610 `define-coding-system-alias'.  */);
10611   Vcoding_system_alist = Qnil;
10612
10613   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10614                doc: /* List of coding-categories (symbols) ordered by priority.
10615
10616 On detecting a coding system, Emacs tries code detection algorithms
10617 associated with each coding-category one by one in this order.  When
10618 one algorithm agrees with a byte sequence of source text, the coding
10619 system bound to the corresponding coding-category is selected.
10620
10621 Don't modify this variable directly, but use `set-coding-priority'.  */);
10622   {
10623     int i;
10624
10625     Vcoding_category_list = Qnil;
10626     for (i = coding_category_max - 1; i >= 0; i--)
10627       Vcoding_category_list
10628         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10629                  Vcoding_category_list);
10630   }
10631
10632   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10633                doc: /* Specify the coding system for read operations.
10634 It is useful to bind this variable with `let', but do not set it globally.
10635 If the value is a coding system, it is used for decoding on read operation.
10636 If not, an appropriate element is used from one of the coding system alists.
10637 There are three such tables: `file-coding-system-alist',
10638 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10639   Vcoding_system_for_read = Qnil;
10640
10641   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10642                doc: /* Specify the coding system for write operations.
10643 Programs bind this variable with `let', but you should not set it globally.
10644 If the value is a coding system, it is used for encoding of output,
10645 when writing it to a file and when sending it to a file or subprocess.
10646
10647 If this does not specify a coding system, an appropriate element
10648 is used from one of the coding system alists.
10649 There are three such tables: `file-coding-system-alist',
10650 `process-coding-system-alist', and `network-coding-system-alist'.
10651 For output to files, if the above procedure does not specify a coding system,
10652 the value of `buffer-file-coding-system' is used.  */);
10653   Vcoding_system_for_write = Qnil;
10654
10655   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10656                doc: /*
10657 Coding system used in the latest file or process I/O.  */);
10658   Vlast_coding_system_used = Qnil;
10659
10660   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10661                doc: /*
10662 Error status of the last code conversion.
10663
10664 When an error was detected in the last code conversion, this variable
10665 is set to one of the following symbols.
10666   `insufficient-source'
10667   `inconsistent-eol'
10668   `invalid-source'
10669   `interrupted'
10670   `insufficient-memory'
10671 When no error was detected, the value doesn't change.  So, to check
10672 the error status of a code conversion by this variable, you must
10673 explicitly set this variable to nil before performing code
10674 conversion.  */);
10675   Vlast_code_conversion_error = Qnil;
10676
10677   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10678                doc: /*
10679 *Non-nil means always inhibit code conversion of end-of-line format.
10680 See info node `Coding Systems' and info node `Text and Binary' concerning
10681 such conversion.  */);
10682   inhibit_eol_conversion = 0;
10683
10684   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10685                doc: /*
10686 Non-nil means process buffer inherits coding system of process output.
10687 Bind it to t if the process output is to be treated as if it were a file
10688 read from some filesystem.  */);
10689   inherit_process_coding_system = 0;
10690
10691   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10692                doc: /*
10693 Alist to decide a coding system to use for a file I/O operation.
10694 The format is ((PATTERN . VAL) ...),
10695 where PATTERN is a regular expression matching a file name,
10696 VAL is a coding system, a cons of coding systems, or a function symbol.
10697 If VAL is a coding system, it is used for both decoding and encoding
10698 the file contents.
10699 If VAL is a cons of coding systems, the car part is used for decoding,
10700 and the cdr part is used for encoding.
10701 If VAL is a function symbol, the function must return a coding system
10702 or a cons of coding systems which are used as above.  The function is
10703 called with an argument that is a list of the arguments with which
10704 `find-operation-coding-system' was called.  If the function can't decide
10705 a coding system, it can return `undecided' so that the normal
10706 code-detection is performed.
10707
10708 See also the function `find-operation-coding-system'
10709 and the variable `auto-coding-alist'.  */);
10710   Vfile_coding_system_alist = Qnil;
10711
10712   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10713                doc: /*
10714 Alist to decide a coding system to use for a process I/O operation.
10715 The format is ((PATTERN . VAL) ...),
10716 where PATTERN is a regular expression matching a program name,
10717 VAL is a coding system, a cons of coding systems, or a function symbol.
10718 If VAL is a coding system, it is used for both decoding what received
10719 from the program and encoding what sent to the program.
10720 If VAL is a cons of coding systems, the car part is used for decoding,
10721 and the cdr part is used for encoding.
10722 If VAL is a function symbol, the function must return a coding system
10723 or a cons of coding systems which are used as above.
10724
10725 See also the function `find-operation-coding-system'.  */);
10726   Vprocess_coding_system_alist = Qnil;
10727
10728   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10729                doc: /*
10730 Alist to decide a coding system to use for a network I/O operation.
10731 The format is ((PATTERN . VAL) ...),
10732 where PATTERN is a regular expression matching a network service name
10733 or is a port number to connect to,
10734 VAL is a coding system, a cons of coding systems, or a function symbol.
10735 If VAL is a coding system, it is used for both decoding what received
10736 from the network stream and encoding what sent to the network stream.
10737 If VAL is a cons of coding systems, the car part is used for decoding,
10738 and the cdr part is used for encoding.
10739 If VAL is a function symbol, the function must return a coding system
10740 or a cons of coding systems which are used as above.
10741
10742 See also the function `find-operation-coding-system'.  */);
10743   Vnetwork_coding_system_alist = Qnil;
10744
10745   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10746                doc: /* Coding system to use with system messages.
10747 Also used for decoding keyboard input on X Window system.  */);
10748   Vlocale_coding_system = Qnil;
10749
10750   /* The eol mnemonics are reset in startup.el system-dependently.  */
10751   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10752                doc: /*
10753 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10754   eol_mnemonic_unix = make_pure_c_string (":");
10755
10756   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10757                doc: /*
10758 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10759   eol_mnemonic_dos = make_pure_c_string ("\\");
10760
10761   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10762                doc: /*
10763 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10764   eol_mnemonic_mac = make_pure_c_string ("/");
10765
10766   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10767                doc: /*
10768 *String displayed in mode line when end-of-line format is not yet determined.  */);
10769   eol_mnemonic_undecided = make_pure_c_string (":");
10770
10771   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10772                doc: /*
10773 *Non-nil enables character translation while encoding and decoding.  */);
10774   Venable_character_translation = Qt;
10775
10776   DEFVAR_LISP ("standard-translation-table-for-decode",
10777                &Vstandard_translation_table_for_decode,
10778                doc: /* Table for translating characters while decoding.  */);
10779   Vstandard_translation_table_for_decode = Qnil;
10780
10781   DEFVAR_LISP ("standard-translation-table-for-encode",
10782                &Vstandard_translation_table_for_encode,
10783                doc: /* Table for translating characters while encoding.  */);
10784   Vstandard_translation_table_for_encode = Qnil;
10785
10786   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10787                doc: /* Alist of charsets vs revision numbers.
10788 While encoding, if a charset (car part of an element) is found,
10789 designate it with the escape sequence identifying revision (cdr part
10790 of the element).  */);
10791   Vcharset_revision_table = Qnil;
10792
10793   DEFVAR_LISP ("default-process-coding-system",
10794                &Vdefault_process_coding_system,
10795                doc: /* Cons of coding systems used for process I/O by default.
10796 The car part is used for decoding a process output,
10797 the cdr part is used for encoding a text to be sent to a process.  */);
10798   Vdefault_process_coding_system = Qnil;
10799
10800   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10801                doc: /*
10802 Table of extra Latin codes in the range 128..159 (inclusive).
10803 This is a vector of length 256.
10804 If Nth element is non-nil, the existence of code N in a file
10805 \(or output of subprocess) doesn't prevent it to be detected as
10806 a coding system of ISO 2022 variant which has a flag
10807 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10808 or reading output of a subprocess.
10809 Only 128th through 159th elements have a meaning.  */);
10810   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10811
10812   DEFVAR_LISP ("select-safe-coding-system-function",
10813                &Vselect_safe_coding_system_function,
10814                doc: /*
10815 Function to call to select safe coding system for encoding a text.
10816
10817 If set, this function is called to force a user to select a proper
10818 coding system which can encode the text in the case that a default
10819 coding system used in each operation can't encode the text.  The
10820 function should take care that the buffer is not modified while
10821 the coding system is being selected.
10822
10823 The default value is `select-safe-coding-system' (which see).  */);
10824   Vselect_safe_coding_system_function = Qnil;
10825
10826   DEFVAR_BOOL ("coding-system-require-warning",
10827                &coding_system_require_warning,
10828                doc: /* Internal use only.
10829 If non-nil, on writing a file, `select-safe-coding-system-function' is
10830 called even if `coding-system-for-write' is non-nil.  The command
10831 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10832   coding_system_require_warning = 0;
10833
10834
10835   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10836                &inhibit_iso_escape_detection,
10837                doc: /*
10838 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10839
10840 When Emacs reads text, it tries to detect how the text is encoded.
10841 This code detection is sensitive to escape sequences.  If Emacs sees
10842 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10843 of the ISO2022 encodings, and decodes text by the corresponding coding
10844 system (e.g. `iso-2022-7bit').
10845
10846 However, there may be a case that you want to read escape sequences in
10847 a file as is.  In such a case, you can set this variable to non-nil.
10848 Then the code detection will ignore any escape sequences, and no text is
10849 detected as encoded in some ISO-2022 encoding.  The result is that all
10850 escape sequences become visible in a buffer.
10851
10852 The default value is nil, and it is strongly recommended not to change
10853 it.  That is because many Emacs Lisp source files that contain
10854 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10855 in Emacs's distribution, and they won't be decoded correctly on
10856 reading if you suppress escape sequence detection.
10857
10858 The other way to read escape sequences in a file without decoding is
10859 to explicitly specify some coding system that doesn't use ISO-2022
10860 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10861   inhibit_iso_escape_detection = 0;
10862
10863   DEFVAR_BOOL ("inhibit-null-byte-detection",
10864                &inhibit_null_byte_detection,
10865                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10866 By default, Emacs treats it as binary data, and does not attempt to
10867 decode it.  The effect is as if you specified `no-conversion' for
10868 reading that text.
10869
10870 Set this to non-nil when a regular text happens to include null bytes.
10871 Examples are Index nodes of Info files and null-byte delimited output
10872 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10873 decode text as usual.  */);
10874   inhibit_null_byte_detection = 0;
10875
10876   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10877                doc: /* Char table for translating self-inserting characters.
10878 This is applied to the result of input methods, not their input.
10879 See also `keyboard-translate-table'.
10880
10881 Use of this variable for character code unification was rendered
10882 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10883 internal character representation.  */);
10884     Vtranslation_table_for_input = Qnil;
10885
10886   {
10887     Lisp_Object args[coding_arg_max];
10888     Lisp_Object plist[16];
10889     int i;
10890
10891     for (i = 0; i < coding_arg_max; i++)
10892       args[i] = Qnil;
10893
10894     plist[0] = intern_c_string (":name");
10895     plist[1] = args[coding_arg_name] = Qno_conversion;
10896     plist[2] = intern_c_string (":mnemonic");
10897     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10898     plist[4] = intern_c_string (":coding-type");
10899     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10900     plist[6] = intern_c_string (":ascii-compatible-p");
10901     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10902     plist[8] = intern_c_string (":default-char");
10903     plist[9] = args[coding_arg_default_char] = make_number (0);
10904     plist[10] = intern_c_string (":for-unibyte");
10905     plist[11] = args[coding_arg_for_unibyte] = Qt;
10906     plist[12] = intern_c_string (":docstring");
10907     plist[13] = make_pure_c_string ("Do no conversion.\n\
10908 \n\
10909 When you visit a file with this coding, the file is read into a\n\
10910 unibyte buffer as is, thus each byte of a file is treated as a\n\
10911 character.");
10912     plist[14] = intern_c_string (":eol-type");
10913     plist[15] = args[coding_arg_eol_type] = Qunix;
10914     args[coding_arg_plist] = Flist (16, plist);
10915     Fdefine_coding_system_internal (coding_arg_max, args);
10916
10917     plist[1] = args[coding_arg_name] = Qundecided;
10918     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10919     plist[5] = args[coding_arg_coding_type] = Qundecided;
10920     /* This is already set.
10921        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10922     plist[8] = intern_c_string (":charset-list");
10923     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10924     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10925     plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10926     plist[15] = args[coding_arg_eol_type] = Qnil;
10927     args[coding_arg_plist] = Flist (16, plist);
10928     Fdefine_coding_system_internal (coding_arg_max, args);
10929   }
10930
10931   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10932
10933   {
10934     int i;
10935
10936     for (i = 0; i < coding_category_max; i++)
10937       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10938   }
10939 #if defined (MSDOS) || defined (WINDOWSNT)
10940   system_eol_type = Qdos;
10941 #else
10942   system_eol_type = Qunix;
10943 #endif
10944   staticpro (&system_eol_type);
10945 }
10946
10947 char *
10948 emacs_strerror (error_number)
10949      int error_number;
10950 {
10951   char *str;
10952
10953   synchronize_system_messages_locale ();
10954   str = strerror (error_number);
10955
10956   if (! NILP (Vlocale_coding_system))
10957     {
10958       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10959                                                       Vlocale_coding_system,
10960                                                       0);
10961       str = (char *) SDATA (dec);
10962     }
10963
10964   return str;
10965 }
10966
10967 #endif /* emacs */
10968
10969 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10970    (do not change this comment) */