src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292
 293 #include "lisp.h"
 294 #include "buffer.h"
 295 #include "character.h"
 296 #include "charset.h"
 297 #include "ccl.h"
 298 #include "composite.h"
 299 #include "coding.h"
 300 #include "window.h"
 301 #include "frame.h"
 302 #include "termhooks.h"
 303
 304 Lisp_Object Vcoding_system_hash_table;
 305
 306 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 307 Lisp_Object Qunix, Qdos;
 308 extern Lisp_Object Qmac;        /* frame.c */
 309 Lisp_Object Qbuffer_file_coding_system;
 310 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 311 Lisp_Object Qdefault_char;
 312 Lisp_Object Qno_conversion, Qundecided;
 313 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 314 Lisp_Object Qbig, Qlittle;
 315 Lisp_Object Qcoding_system_history;
 316 Lisp_Object Qvalid_codes;
 317 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 318 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 319 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 320 Lisp_Object QCascii_compatible_p;
 321
 322 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 323 Lisp_Object Qcall_process, Qcall_process_region;
 324 Lisp_Object Qstart_process, Qopen_network_stream;
 325 Lisp_Object Qtarget_idx;
 326
 327 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 328 Lisp_Object Qinterrupted, Qinsufficient_memory;
 329
 330 extern Lisp_Object Qcompletion_ignore_case;
 331
 332 /* If a symbol has this property, evaluate the value to define the
 333    symbol as a coding system.  */
 334 static Lisp_Object Qcoding_system_define_form;
 335
 336 int coding_system_require_warning;
 337
 338 Lisp_Object Vselect_safe_coding_system_function;
 339
 340 /* Mnemonic string for each format of end-of-line.  */
 341 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 342 /* Mnemonic string to indicate format of end-of-line is not yet
 343    decided.  */
 344 Lisp_Object eol_mnemonic_undecided;
 345
 346 /* Format of end-of-line decided by system.  This is Qunix on
 347    Unix and Mac, Qdos on DOS/Windows.
 348    This has an effect only for external encoding (i.e. for output to
 349    file and process), not for in-buffer or Lisp string encoding.  */
 350 static Lisp_Object system_eol_type;
 351
 352 #ifdef emacs
 353
 354 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 355
 356 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 357
 358 /* Coding system emacs-mule and raw-text are for converting only
 359    end-of-line format.  */
 360 Lisp_Object Qemacs_mule, Qraw_text;
 361 Lisp_Object Qutf_8_emacs;
 362
 363 /* Coding-systems are handed between Emacs Lisp programs and C internal
 364    routines by the following three variables.  */
 365 /* Coding-system for reading files and receiving data from process.  */
 366 Lisp_Object Vcoding_system_for_read;
 367 /* Coding-system for writing files and sending data to process.  */
 368 Lisp_Object Vcoding_system_for_write;
 369 /* Coding-system actually used in the latest I/O.  */
 370 Lisp_Object Vlast_coding_system_used;
 371 /* Set to non-nil when an error is detected while code conversion.  */
 372 Lisp_Object Vlast_code_conversion_error;
 373 /* A vector of length 256 which contains information about special
 374    Latin codes (especially for dealing with Microsoft codes).  */
 375 Lisp_Object Vlatin_extra_code_table;
 376
 377 /* Flag to inhibit code conversion of end-of-line format.  */
 378 int inhibit_eol_conversion;
 379
 380 /* Flag to inhibit ISO2022 escape sequence detection.  */
 381 int inhibit_iso_escape_detection;
 382
 383 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 384 int inherit_process_coding_system;
 385
 386 /* Coding system to be used to encode text for terminal display when
 387    terminal coding system is nil.  */
 388 struct coding_system safe_terminal_coding;
 389
 390 Lisp_Object Vfile_coding_system_alist;
 391 Lisp_Object Vprocess_coding_system_alist;
 392 Lisp_Object Vnetwork_coding_system_alist;
 393
 394 Lisp_Object Vlocale_coding_system;
 395
 396 #endif /* emacs */
 397
 398 /* Flag to tell if we look up translation table on character code
 399    conversion.  */
 400 Lisp_Object Venable_character_translation;
 401 /* Standard translation table to look up on decoding (reading).  */
 402 Lisp_Object Vstandard_translation_table_for_decode;
 403 /* Standard translation table to look up on encoding (writing).  */
 404 Lisp_Object Vstandard_translation_table_for_encode;
 405
 406 Lisp_Object Qtranslation_table;
 407 Lisp_Object Qtranslation_table_id;
 408 Lisp_Object Qtranslation_table_for_decode;
 409 Lisp_Object Qtranslation_table_for_encode;
 410
 411 /* Alist of charsets vs revision number.  */
 412 static Lisp_Object Vcharset_revision_table;
 413
 414 /* Default coding systems used for process I/O.  */
 415 Lisp_Object Vdefault_process_coding_system;
 416
 417 /* Char table for translating Quail and self-inserting input.  */
 418 Lisp_Object Vtranslation_table_for_input;
 419
 420 /* Two special coding systems.  */
 421 Lisp_Object Vsjis_coding_system;
 422 Lisp_Object Vbig5_coding_system;
 423
 424 /* ISO2022 section */
 425
 426 #define CODING_ISO_INITIAL(coding, reg)                 \
 427   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 428                      coding_attr_iso_initial),          \
 429                reg)))
 430
 431
 432 #define CODING_ISO_REQUEST(coding, charset_id)  \
 433   ((charset_id <= (coding)->max_charset_id      \
 434     ? (coding)->safe_charsets[charset_id]       \
 435     : -1))
 436
 437
 438 #define CODING_ISO_FLAGS(coding)        \
 439   ((coding)->spec.iso_2022.flags)
 440 #define CODING_ISO_DESIGNATION(coding, reg)     \
 441   ((coding)->spec.iso_2022.current_designation[reg])
 442 #define CODING_ISO_INVOCATION(coding, plane)    \
 443   ((coding)->spec.iso_2022.current_invocation[plane])
 444 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 445   ((coding)->spec.iso_2022.single_shifting)
 446 #define CODING_ISO_BOL(coding)  \
 447   ((coding)->spec.iso_2022.bol)
 448 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 449   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 450
 451 /* Control characters of ISO2022.  */
 452                         /* code */      /* function */
 453 #define ISO_CODE_LF     0x0A            /* line-feed */
 454 #define ISO_CODE_CR     0x0D            /* carriage-return */
 455 #define ISO_CODE_SO     0x0E            /* shift-out */
 456 #define ISO_CODE_SI     0x0F            /* shift-in */
 457 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 458 #define ISO_CODE_ESC    0x1B            /* escape */
 459 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 460 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 461 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 462
 463 /* All code (1-byte) of ISO2022 is classified into one of the
 464    followings.  */
 465 enum iso_code_class_type
 466   {
 467     ISO_control_0,              /* Control codes in the range
 468                                    0x00..0x1F and 0x7F, except for the
 469                                    following 5 codes.  */
 470     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 471     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 472     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 473     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 474     ISO_control_1,              /* Control codes in the range
 475                                    0x80..0x9F, except for the
 476                                    following 3 codes.  */
 477     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 478     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 479     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 480     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 481     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 482     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 483     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 484   };
 485
 486 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 487     `iso-flags' attribute of an iso2022 coding system.  */
 488
 489 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 490    instead of the correct short-form sequence (e.g. ESC $ A).  */
 491 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 492
 493 /* If set, reset graphic planes and registers at end-of-line to the
 494    initial state.  */
 495 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 496
 497 /* If set, reset graphic planes and registers before any control
 498    characters to the initial state.  */
 499 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 500
 501 /* If set, encode by 7-bit environment.  */
 502 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 503
 504 /* If set, use locking-shift function.  */
 505 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 506
 507 /* If set, use single-shift function.  Overwrite
 508    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 509 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 510
 511 /* If set, use designation escape sequence.  */
 512 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 513
 514 /* If set, produce revision number sequence.  */
 515 #define CODING_ISO_FLAG_REVISION        0x0080
 516
 517 /* If set, produce ISO6429's direction specifying sequence.  */
 518 #define CODING_ISO_FLAG_DIRECTION       0x0100
 519
 520 /* If set, assume designation states are reset at beginning of line on
 521    output.  */
 522 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 523
 524 /* If set, designation sequence should be placed at beginning of line
 525    on output.  */
 526 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 527
 528 /* If set, do not encode unsafe charactes on output.  */
 529 #define CODING_ISO_FLAG_SAFE            0x0800
 530
 531 /* If set, extra latin codes (128..159) are accepted as a valid code
 532    on input.  */
 533 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 534
 535 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 536
 537 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 538
 539 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 540
 541 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 542
 543 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 544
 545 /* A character to be produced on output if encoding of the original
 546    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 547 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 548
 549 /* UTF-8 section */
 550 #define CODING_UTF_8_BOM(coding)        \
 551   ((coding)->spec.utf_8_bom)
 552
 553 /* UTF-16 section */
 554 #define CODING_UTF_16_BOM(coding)       \
 555   ((coding)->spec.utf_16.bom)
 556
 557 #define CODING_UTF_16_ENDIAN(coding)    \
 558   ((coding)->spec.utf_16.endian)
 559
 560 #define CODING_UTF_16_SURROGATE(coding) \
 561   ((coding)->spec.utf_16.surrogate)
 562
 563
 564 /* CCL section */
 565 #define CODING_CCL_DECODER(coding)      \
 566   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 567 #define CODING_CCL_ENCODER(coding)      \
 568   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 569 #define CODING_CCL_VALIDS(coding)                                          \
 570   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 571
 572 /* Index for each coding category in `coding_categories' */
 573
 574 enum coding_category
 575   {
 576     coding_category_iso_7,
 577     coding_category_iso_7_tight,
 578     coding_category_iso_8_1,
 579     coding_category_iso_8_2,
 580     coding_category_iso_7_else,
 581     coding_category_iso_8_else,
 582     coding_category_utf_8_auto,
 583     coding_category_utf_8_nosig,
 584     coding_category_utf_8_sig,
 585     coding_category_utf_16_auto,
 586     coding_category_utf_16_be,
 587     coding_category_utf_16_le,
 588     coding_category_utf_16_be_nosig,
 589     coding_category_utf_16_le_nosig,
 590     coding_category_charset,
 591     coding_category_sjis,
 592     coding_category_big5,
 593     coding_category_ccl,
 594     coding_category_emacs_mule,
 595     /* All above are targets of code detection.  */
 596     coding_category_raw_text,
 597     coding_category_undecided,
 598     coding_category_max
 599   };
 600
 601 /* Definitions of flag bits used in detect_coding_XXXX.  */
 602 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 603 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 604 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 605 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 606 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 607 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 608 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 609 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 610 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 611 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 612 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 613 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 614 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 615 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 616 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 617 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 618 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 619 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 620 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 621 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 622
 623 /* This value is returned if detect_coding_mask () find nothing other
 624    than ASCII characters.  */
 625 #define CATEGORY_MASK_ANY               \
 626   (CATEGORY_MASK_ISO_7                  \
 627    | CATEGORY_MASK_ISO_7_TIGHT          \
 628    | CATEGORY_MASK_ISO_8_1              \
 629    | CATEGORY_MASK_ISO_8_2              \
 630    | CATEGORY_MASK_ISO_7_ELSE           \
 631    | CATEGORY_MASK_ISO_8_ELSE           \
 632    | CATEGORY_MASK_UTF_8_AUTO           \
 633    | CATEGORY_MASK_UTF_8_NOSIG          \
 634    | CATEGORY_MASK_UTF_8_SIG            \
 635    | CATEGORY_MASK_UTF_16_AUTO          \
 636    | CATEGORY_MASK_UTF_16_BE            \
 637    | CATEGORY_MASK_UTF_16_LE            \
 638    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 639    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 640    | CATEGORY_MASK_CHARSET              \
 641    | CATEGORY_MASK_SJIS                 \
 642    | CATEGORY_MASK_BIG5                 \
 643    | CATEGORY_MASK_CCL                  \
 644    | CATEGORY_MASK_EMACS_MULE)
 645
 646
 647 #define CATEGORY_MASK_ISO_7BIT \
 648   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 649
 650 #define CATEGORY_MASK_ISO_8BIT \
 651   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 652
 653 #define CATEGORY_MASK_ISO_ELSE \
 654   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 655
 656 #define CATEGORY_MASK_ISO_ESCAPE        \
 657   (CATEGORY_MASK_ISO_7                  \
 658    | CATEGORY_MASK_ISO_7_TIGHT          \
 659    | CATEGORY_MASK_ISO_7_ELSE           \
 660    | CATEGORY_MASK_ISO_8_ELSE)
 661
 662 #define CATEGORY_MASK_ISO       \
 663   (  CATEGORY_MASK_ISO_7BIT     \
 664      | CATEGORY_MASK_ISO_8BIT   \
 665      | CATEGORY_MASK_ISO_ELSE)
 666
 667 #define CATEGORY_MASK_UTF_16            \
 668   (CATEGORY_MASK_UTF_16_AUTO            \
 669    | CATEGORY_MASK_UTF_16_BE            \
 670    | CATEGORY_MASK_UTF_16_LE            \
 671    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 672    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 673
 674 #define CATEGORY_MASK_UTF_8     \
 675   (CATEGORY_MASK_UTF_8_AUTO     \
 676    | CATEGORY_MASK_UTF_8_NOSIG  \
 677    | CATEGORY_MASK_UTF_8_SIG)
 678
 679 /* List of symbols `coding-category-xxx' ordered by priority.  This
 680    variable is exposed to Emacs Lisp.  */
 681 static Lisp_Object Vcoding_category_list;
 682
 683 /* Table of coding categories (Lisp symbols).  This variable is for
 684    internal use oly.  */
 685 static Lisp_Object Vcoding_category_table;
 686
 687 /* Table of coding-categories ordered by priority.  */
 688 static enum coding_category coding_priorities[coding_category_max];
 689
 690 /* Nth element is a coding context for the coding system bound to the
 691    Nth coding category.  */
 692 static struct coding_system coding_categories[coding_category_max];
 693
 694 /*** Commonly used macros and functions ***/
 695
 696 #ifndef min
 697 #define min(a, b) ((a) < (b) ? (a) : (b))
 698 #endif
 699 #ifndef max
 700 #define max(a, b) ((a) > (b) ? (a) : (b))
 701 #endif
 702
 703 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 704   do {                                                  \
 705     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 706     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 707   } while (0)
 708
 709
 710 /* Safely get one byte from the source text pointed by SRC which ends
 711    at SRC_END, and set C to that byte.  If there are not enough bytes
 712    in the source, it jumps to `no_more_source'.  If multibytep is
 713    nonzero, and a multibyte character is found at SRC, set C to the
 714    negative value of the character code.  The caller should declare
 715    and set these variables appropriately in advance:
 716         src, src_end, multibytep */
 717
 718 #define ONE_MORE_BYTE(c)                                \
 719   do {                                                  \
 720     if (src == src_end)                                 \
 721       {                                                 \
 722         if (src_base < src)                             \
 723           record_conversion_result                      \
 724             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 725         goto no_more_source;                            \
 726       }                                                 \
 727     c = *src++;                                         \
 728     if (multibytep && (c & 0x80))                       \
 729       {                                                 \
 730         if ((c & 0xFE) == 0xC0)                         \
 731           c = ((c & 1) << 6) | *src++;                  \
 732         else                                            \
 733           {                                             \
 734             src--;                                      \
 735             c = - string_char (src, &src, NULL);        \
 736             record_conversion_result                    \
 737               (coding, CODING_RESULT_INVALID_SRC);      \
 738           }                                             \
 739       }                                                 \
 740     consumed_chars++;                                   \
 741   } while (0)
 742
 743
 744 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 745   do {                                                  \
 746     c = *src++;                                         \
 747     if (multibytep && (c & 0x80))                       \
 748       {                                                 \
 749         if ((c & 0xFE) == 0xC0)                         \
 750           c = ((c & 1) << 6) | *src++;                  \
 751         else                                            \
 752           {                                             \
 753             src--;                                      \
 754             c = - string_char (src, &src, NULL);        \
 755             record_conversion_result                    \
 756               (coding, CODING_RESULT_INVALID_SRC);      \
 757           }                                             \
 758       }                                                 \
 759     consumed_chars++;                                   \
 760   } while (0)
 761
 762
 763 /* Store a byte C in the place pointed by DST and increment DST to the
 764    next free point, and increment PRODUCED_CHARS.  The caller should
 765    assure that C is 0..127, and declare and set the variable `dst'
 766    appropriately in advance.
 767 */
 768
 769
 770 #define EMIT_ONE_ASCII_BYTE(c)  \
 771   do {                          \
 772     produced_chars++;           \
 773     *dst++ = (c);               \
 774   } while (0)
 775
 776
 777 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 778
 779 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 780   do {                                  \
 781     produced_chars += 2;                \
 782     *dst++ = (c1), *dst++ = (c2);       \
 783   } while (0)
 784
 785
 786 /* Store a byte C in the place pointed by DST and increment DST to the
 787    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 788    nonzero, store in an appropriate multibyte from.  The caller should
 789    declare and set the variables `dst' and `multibytep' appropriately
 790    in advance.  */
 791
 792 #define EMIT_ONE_BYTE(c)                \
 793   do {                                  \
 794     produced_chars++;                   \
 795     if (multibytep)                     \
 796       {                                 \
 797         int ch = (c);                   \
 798         if (ch >= 0x80)                 \
 799           ch = BYTE8_TO_CHAR (ch);      \
 800         CHAR_STRING_ADVANCE (ch, dst);  \
 801       }                                 \
 802     else                                \
 803       *dst++ = (c);                     \
 804   } while (0)
 805
 806
 807 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 808
 809 #define EMIT_TWO_BYTES(c1, c2)          \
 810   do {                                  \
 811     produced_chars += 2;                \
 812     if (multibytep)                     \
 813       {                                 \
 814         int ch;                         \
 815                                         \
 816         ch = (c1);                      \
 817         if (ch >= 0x80)                 \
 818           ch = BYTE8_TO_CHAR (ch);      \
 819         CHAR_STRING_ADVANCE (ch, dst);  \
 820         ch = (c2);                      \
 821         if (ch >= 0x80)                 \
 822           ch = BYTE8_TO_CHAR (ch);      \
 823         CHAR_STRING_ADVANCE (ch, dst);  \
 824       }                                 \
 825     else                                \
 826       {                                 \
 827         *dst++ = (c1);                  \
 828         *dst++ = (c2);                  \
 829       }                                 \
 830   } while (0)
 831
 832
 833 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 834   do {                                  \
 835     EMIT_ONE_BYTE (c1);                 \
 836     EMIT_TWO_BYTES (c2, c3);            \
 837   } while (0)
 838
 839
 840 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 841   do {                                          \
 842     EMIT_TWO_BYTES (c1, c2);                    \
 843     EMIT_TWO_BYTES (c3, c4);                    \
 844   } while (0)
 845
 846
 847 /* Prototypes for static functions.  */
 848 static void record_conversion_result P_ ((struct coding_system *coding,
 849                                           enum coding_result_code result));
 850 static int detect_coding_utf_8 P_ ((struct coding_system *,
 851                                     struct coding_detection_info *info));
 852 static void decode_coding_utf_8 P_ ((struct coding_system *));
 853 static int encode_coding_utf_8 P_ ((struct coding_system *));
 854
 855 static int detect_coding_utf_16 P_ ((struct coding_system *,
 856                                      struct coding_detection_info *info));
 857 static void decode_coding_utf_16 P_ ((struct coding_system *));
 858 static int encode_coding_utf_16 P_ ((struct coding_system *));
 859
 860 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 861                                        struct coding_detection_info *info));
 862 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 863 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 864
 865 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 866                                          struct coding_detection_info *info));
 867 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 868 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 869
 870 static int detect_coding_sjis P_ ((struct coding_system *,
 871                                    struct coding_detection_info *info));
 872 static void decode_coding_sjis P_ ((struct coding_system *));
 873 static int encode_coding_sjis P_ ((struct coding_system *));
 874
 875 static int detect_coding_big5 P_ ((struct coding_system *,
 876                                    struct coding_detection_info *info));
 877 static void decode_coding_big5 P_ ((struct coding_system *));
 878 static int encode_coding_big5 P_ ((struct coding_system *));
 879
 880 static int detect_coding_ccl P_ ((struct coding_system *,
 881                                   struct coding_detection_info *info));
 882 static void decode_coding_ccl P_ ((struct coding_system *));
 883 static int encode_coding_ccl P_ ((struct coding_system *));
 884
 885 static void decode_coding_raw_text P_ ((struct coding_system *));
 886 static int encode_coding_raw_text P_ ((struct coding_system *));
 887
 888 static void coding_set_source P_ ((struct coding_system *));
 889 static void coding_set_destination P_ ((struct coding_system *));
 890 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 891 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 892                                             EMACS_INT, EMACS_INT));
 893 static unsigned char *alloc_destination P_ ((struct coding_system *,
 894                                              EMACS_INT, unsigned char *));
 895 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 896 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 897                                                      int *, int *,
 898                                                      unsigned char *));
 899 static int detect_eol P_ ((const unsigned char *,
 900                            EMACS_INT, enum coding_category));
 901 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 902 static void decode_eol P_ ((struct coding_system *));
 903 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 904 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 905                                         int, int *, int *));
 906 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 907 static INLINE void produce_composition P_ ((struct coding_system *, int *,
 908                                             EMACS_INT));
 909 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 910                                         EMACS_INT));
 911 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 912 static int decode_coding P_ ((struct coding_system *));
 913 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 914                                                       struct coding_system *,
 915                                                       int *, EMACS_INT *));
 916 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 917                                                   struct coding_system *,
 918                                                   int *, EMACS_INT *));
 919 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 920 static int encode_coding P_ ((struct coding_system *));
 921 static Lisp_Object make_conversion_work_buffer P_ ((int));
 922 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 923 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 924 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 925
 926 static void
 927 record_conversion_result (struct coding_system *coding,
 928                           enum coding_result_code result)
 929 {
 930   coding->result = result;
 931   switch (result)
 932     {
 933     case CODING_RESULT_INSUFFICIENT_SRC:
 934       Vlast_code_conversion_error = Qinsufficient_source;
 935       break;
 936     case CODING_RESULT_INCONSISTENT_EOL:
 937       Vlast_code_conversion_error = Qinconsistent_eol;
 938       break;
 939     case CODING_RESULT_INVALID_SRC:
 940       Vlast_code_conversion_error = Qinvalid_source;
 941       break;
 942     case CODING_RESULT_INTERRUPT:
 943       Vlast_code_conversion_error = Qinterrupted;
 944       break;
 945     case CODING_RESULT_INSUFFICIENT_MEM:
 946       Vlast_code_conversion_error = Qinsufficient_memory;
 947       break;
 948     default:
 949       Vlast_code_conversion_error = intern ("Unknown error");
 950     }
 951 }
 952
 953 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 954   do {                                                                       \
 955     charset_map_loaded = 0;                                                  \
 956     c = DECODE_CHAR (charset, code);                                         \
 957     if (charset_map_loaded)                                                  \
 958       {                                                                      \
 959         const unsigned char *orig = coding->source;                          \
 960         EMACS_INT offset;                                                    \
 961                                                                              \
 962         coding_set_source (coding);                                          \
 963         offset = coding->source - orig;                                      \
 964         src += offset;                                                       \
 965         src_base += offset;                                                  \
 966         src_end += offset;                                                   \
 967       }                                                                      \
 968   } while (0)
 969
 970
 971 /* If there are at least BYTES length of room at dst, allocate memory
 972    for coding->destination and update dst and dst_end.  We don't have
 973    to take care of coding->source which will be relocated.  It is
 974    handled by calling coding_set_source in encode_coding.  */
 975
 976 #define ASSURE_DESTINATION(bytes)                               \
 977   do {                                                          \
 978     if (dst + (bytes) >= dst_end)                               \
 979       {                                                         \
 980         int more_bytes = charbuf_end - charbuf + (bytes);       \
 981                                                                 \
 982         dst = alloc_destination (coding, more_bytes, dst);      \
 983         dst_end = coding->destination + coding->dst_bytes;      \
 984       }                                                         \
 985   } while (0)
 986
 987
 988 /* Store multibyte form of the character C in P, and advance P to the
 989    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
 990    never calls MAYBE_UNIFY_CHAR.  */
 991
 992 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
 993   do {                                          \
 994     if ((c) <= MAX_1_BYTE_CHAR)                 \
 995       *(p)++ = (c);                             \
 996     else if ((c) <= MAX_2_BYTE_CHAR)            \
 997       *(p)++ = (0xC0 | ((c) >> 6)),             \
 998         *(p)++ = (0x80 | ((c) & 0x3F));         \
 999     else if ((c) <= MAX_3_BYTE_CHAR)            \
1000       *(p)++ = (0xE0 | ((c) >> 12)),            \
1001         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1002         *(p)++ = (0x80 | ((c) & 0x3F));         \
1003     else if ((c) <= MAX_4_BYTE_CHAR)            \
1004       *(p)++ = (0xF0 | (c >> 18)),              \
1005         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1006         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1007         *(p)++ = (0x80 | (c & 0x3F));           \
1008     else if ((c) <= MAX_5_BYTE_CHAR)            \
1009       *(p)++ = 0xF8,                            \
1010         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1011         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1012         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1013         *(p)++ = (0x80 | (c & 0x3F));           \
1014     else                                        \
1015       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1016   } while (0)
1017
1018
1019 /* Return the character code of character whose multibyte form is at
1020    P, and advance P to the end of the multibyte form.  This is like
1021    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1022
1023 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1024   (!((p)[0] & 0x80)                                             \
1025    ? *(p)++                                                     \
1026    : ! ((p)[0] & 0x20)                                          \
1027    ? ((p) += 2,                                                 \
1028       ((((p)[-2] & 0x1F) << 6)                                  \
1029        | ((p)[-1] & 0x3F)                                       \
1030        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1031    : ! ((p)[0] & 0x10)                                          \
1032    ? ((p) += 3,                                                 \
1033       ((((p)[-3] & 0x0F) << 12)                                 \
1034        | (((p)[-2] & 0x3F) << 6)                                \
1035        | ((p)[-1] & 0x3F)))                                     \
1036    : ! ((p)[0] & 0x08)                                          \
1037    ? ((p) += 4,                                                 \
1038       ((((p)[-4] & 0xF) << 18)                                  \
1039        | (((p)[-3] & 0x3F) << 12)                               \
1040        | (((p)[-2] & 0x3F) << 6)                                \
1041        | ((p)[-1] & 0x3F)))                                     \
1042    : ((p) += 5,                                                 \
1043       ((((p)[-4] & 0x3F) << 18)                                 \
1044        | (((p)[-3] & 0x3F) << 12)                               \
1045        | (((p)[-2] & 0x3F) << 6)                                \
1046        | ((p)[-1] & 0x3F))))
1047
1048
1049 static void
1050 coding_set_source (coding)
1051      struct coding_system *coding;
1052 {
1053   if (BUFFERP (coding->src_object))
1054     {
1055       struct buffer *buf = XBUFFER (coding->src_object);
1056
1057       if (coding->src_pos < 0)
1058         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1059       else
1060         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1061     }
1062   else if (STRINGP (coding->src_object))
1063     {
1064       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1065     }
1066   else
1067     /* Otherwise, the source is C string and is never relocated
1068        automatically.  Thus we don't have to update anything.  */
1069     ;
1070 }
1071
1072 static void
1073 coding_set_destination (coding)
1074      struct coding_system *coding;
1075 {
1076   if (BUFFERP (coding->dst_object))
1077     {
1078       if (coding->src_pos < 0)
1079         {
1080           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1081           coding->dst_bytes = (GAP_END_ADDR
1082                                - (coding->src_bytes - coding->consumed)
1083                                - coding->destination);
1084         }
1085       else
1086         {
1087           /* We are sure that coding->dst_pos_byte is before the gap
1088              of the buffer. */
1089           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1090                                  + coding->dst_pos_byte - BEG_BYTE);
1091           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1092                                - coding->destination);
1093         }
1094     }
1095   else
1096     /* Otherwise, the destination is C string and is never relocated
1097        automatically.  Thus we don't have to update anything.  */
1098     ;
1099 }
1100
1101
1102 static void
1103 coding_alloc_by_realloc (coding, bytes)
1104      struct coding_system *coding;
1105      EMACS_INT bytes;
1106 {
1107   coding->destination = (unsigned char *) xrealloc (coding->destination,
1108                                                     coding->dst_bytes + bytes);
1109   coding->dst_bytes += bytes;
1110 }
1111
1112 static void
1113 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1114      struct coding_system *coding;
1115      EMACS_INT gap_head_used, bytes;
1116 {
1117   if (EQ (coding->src_object, coding->dst_object))
1118     {
1119       /* The gap may contain the produced data at the head and not-yet
1120          consumed data at the tail.  To preserve those data, we at
1121          first make the gap size to zero, then increase the gap
1122          size.  */
1123       EMACS_INT add = GAP_SIZE;
1124
1125       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1126       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1127       make_gap (bytes);
1128       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1129       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1130     }
1131   else
1132     {
1133       Lisp_Object this_buffer;
1134
1135       this_buffer = Fcurrent_buffer ();
1136       set_buffer_internal (XBUFFER (coding->dst_object));
1137       make_gap (bytes);
1138       set_buffer_internal (XBUFFER (this_buffer));
1139     }
1140 }
1141
1142
1143 static unsigned char *
1144 alloc_destination (coding, nbytes, dst)
1145      struct coding_system *coding;
1146      EMACS_INT nbytes;
1147      unsigned char *dst;
1148 {
1149   EMACS_INT offset = dst - coding->destination;
1150
1151   if (BUFFERP (coding->dst_object))
1152     {
1153       struct buffer *buf = XBUFFER (coding->dst_object);
1154
1155       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1156     }
1157   else
1158     coding_alloc_by_realloc (coding, nbytes);
1159   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1160   coding_set_destination (coding);
1161   dst = coding->destination + offset;
1162   return dst;
1163 }
1164
1165 /** Macros for annotations.  */
1166
1167 /* Maximum length of annotation data (sum of annotations for
1168    composition and charset).  */
1169 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1170
1171 /* An annotation data is stored in the array coding->charbuf in this
1172    format:
1173      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1174    LENGTH is the number of elements in the annotation.
1175    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1176    NCHARS is the number of characters in the text annotated.
1177
1178    The format of the following elements depend on ANNOTATION_MASK.
1179
1180    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1181    follows:
1182      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1183    METHOD is one of enum composition_method.
1184    Optionnal COMPOSITION-COMPONENTS are characters and composition
1185    rules.
1186
1187    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1188    follows.  */
1189
1190 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1191   do {                                                  \
1192     *(buf)++ = -(len);                                  \
1193     *(buf)++ = (mask);                                  \
1194     *(buf)++ = (nchars);                                \
1195     coding->annotated = 1;                              \
1196   } while (0);
1197
1198 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1199   do {                                                                      \
1200     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1201     *buf++ = method;                                                        \
1202   } while (0)
1203
1204
1205 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1206   do {                                                                  \
1207     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1208     *buf++ = id;                                                        \
1209   } while (0)
1210
1211 \f
1212 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1213
1214
1215
1216 \f
1217 /*** 3. UTF-8 ***/
1218
1219 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1220    Check if a text is encoded in UTF-8.  If it is, return 1, else
1221    return 0.  */
1222
1223 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1224 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1225 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1226 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1227 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1228 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1229
1230 #define UTF_BOM 0xFEFF
1231 #define UTF_8_BOM_1 0xEF
1232 #define UTF_8_BOM_2 0xBB
1233 #define UTF_8_BOM_3 0xBF
1234
1235 static int
1236 detect_coding_utf_8 (coding, detect_info)
1237      struct coding_system *coding;
1238      struct coding_detection_info *detect_info;
1239 {
1240   const unsigned char *src = coding->source, *src_base;
1241   const unsigned char *src_end = coding->source + coding->src_bytes;
1242   int multibytep = coding->src_multibyte;
1243   int consumed_chars = 0;
1244   int bom_found = 0;
1245   int found = 0;
1246
1247   detect_info->checked |= CATEGORY_MASK_UTF_8;
1248   /* A coding system of this category is always ASCII compatible.  */
1249   src += coding->head_ascii;
1250
1251   while (1)
1252     {
1253       int c, c1, c2, c3, c4;
1254
1255       src_base = src;
1256       ONE_MORE_BYTE (c);
1257       if (c < 0 || UTF_8_1_OCTET_P (c))
1258         continue;
1259       ONE_MORE_BYTE (c1);
1260       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1261         break;
1262       if (UTF_8_2_OCTET_LEADING_P (c))
1263         {
1264           found = 1;
1265           continue;
1266         }
1267       ONE_MORE_BYTE (c2);
1268       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1269         break;
1270       if (UTF_8_3_OCTET_LEADING_P (c))
1271         {
1272           found = 1;
1273           if (src_base == coding->source
1274               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1275             bom_found = 1;
1276           continue;
1277         }
1278       ONE_MORE_BYTE (c3);
1279       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1280         break;
1281       if (UTF_8_4_OCTET_LEADING_P (c))
1282         {
1283           found = 1;
1284           continue;
1285         }
1286       ONE_MORE_BYTE (c4);
1287       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1288         break;
1289       if (UTF_8_5_OCTET_LEADING_P (c))
1290         {
1291           found = 1;
1292           continue;
1293         }
1294       break;
1295     }
1296   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1297   return 0;
1298
1299  no_more_source:
1300   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1301     {
1302       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1303       return 0;
1304     }
1305   if (bom_found)
1306     {
1307       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1308       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1309     }
1310   else
1311     {
1312       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1313       detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1314     }
1315   return 1;
1316 }
1317
1318
1319 static void
1320 decode_coding_utf_8 (coding)
1321      struct coding_system *coding;
1322 {
1323   const unsigned char *src = coding->source + coding->consumed;
1324   const unsigned char *src_end = coding->source + coding->src_bytes;
1325   const unsigned char *src_base;
1326   int *charbuf = coding->charbuf + coding->charbuf_used;
1327   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1328   int consumed_chars = 0, consumed_chars_base;
1329   int multibytep = coding->src_multibyte;
1330   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1331   Lisp_Object attr, charset_list;
1332   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1333   int byte_after_cr = -1;
1334
1335   CODING_GET_INFO (coding, attr, charset_list);
1336
1337   if (bom != utf_without_bom)
1338     {
1339       int c1, c2, c3;
1340
1341       src_base = src;
1342       ONE_MORE_BYTE (c1);
1343       if (! UTF_8_3_OCTET_LEADING_P (c1))
1344         src = src_base;
1345       else
1346         {
1347           ONE_MORE_BYTE (c2);
1348           if (! UTF_8_EXTRA_OCTET_P (c2))
1349             src = src_base;
1350           else
1351             {
1352               ONE_MORE_BYTE (c3);
1353               if (! UTF_8_EXTRA_OCTET_P (c3))
1354                 src = src_base;
1355               else
1356                 {
1357                   if ((c1 != UTF_8_BOM_1)
1358                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1359                     src = src_base;
1360                   else
1361                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1362                 }
1363             }
1364         }
1365     }
1366   CODING_UTF_8_BOM (coding) = utf_without_bom;
1367
1368
1369
1370   while (1)
1371     {
1372       int c, c1, c2, c3, c4, c5;
1373
1374       src_base = src;
1375       consumed_chars_base = consumed_chars;
1376
1377       if (charbuf >= charbuf_end)
1378         break;
1379
1380       if (byte_after_cr >= 0)
1381         c1 = byte_after_cr, byte_after_cr = -1;
1382       else
1383         ONE_MORE_BYTE (c1);
1384       if (c1 < 0)
1385         {
1386           c = - c1;
1387         }
1388       else if (UTF_8_1_OCTET_P(c1))
1389         {
1390           if (eol_crlf && c1 == '\r')
1391             ONE_MORE_BYTE (byte_after_cr);
1392           c = c1;
1393         }
1394       else
1395         {
1396           ONE_MORE_BYTE (c2);
1397           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1398             goto invalid_code;
1399           if (UTF_8_2_OCTET_LEADING_P (c1))
1400             {
1401               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1402               /* Reject overlong sequences here and below.  Encoders
1403                  producing them are incorrect, they can be misleading,
1404                  and they mess up read/write invariance.  */
1405               if (c < 128)
1406                 goto invalid_code;
1407             }
1408           else
1409             {
1410               ONE_MORE_BYTE (c3);
1411               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1412                 goto invalid_code;
1413               if (UTF_8_3_OCTET_LEADING_P (c1))
1414                 {
1415                   c = (((c1 & 0xF) << 12)
1416                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1417                   if (c < 0x800
1418                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1419                     goto invalid_code;
1420                 }
1421               else
1422                 {
1423                   ONE_MORE_BYTE (c4);
1424                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1425                     goto invalid_code;
1426                   if (UTF_8_4_OCTET_LEADING_P (c1))
1427                     {
1428                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1429                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1430                     if (c < 0x10000)
1431                       goto invalid_code;
1432                     }
1433                   else
1434                     {
1435                       ONE_MORE_BYTE (c5);
1436                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1437                         goto invalid_code;
1438                       if (UTF_8_5_OCTET_LEADING_P (c1))
1439                         {
1440                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1441                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1442                                | (c5 & 0x3F));
1443                           if ((c > MAX_CHAR) || (c < 0x200000))
1444                             goto invalid_code;
1445                         }
1446                       else
1447                         goto invalid_code;
1448                     }
1449                 }
1450             }
1451         }
1452
1453       *charbuf++ = c;
1454       continue;
1455
1456     invalid_code:
1457       src = src_base;
1458       consumed_chars = consumed_chars_base;
1459       ONE_MORE_BYTE (c);
1460       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1461       coding->errors++;
1462     }
1463
1464  no_more_source:
1465   coding->consumed_char += consumed_chars_base;
1466   coding->consumed = src_base - coding->source;
1467   coding->charbuf_used = charbuf - coding->charbuf;
1468 }
1469
1470
1471 static int
1472 encode_coding_utf_8 (coding)
1473      struct coding_system *coding;
1474 {
1475   int multibytep = coding->dst_multibyte;
1476   int *charbuf = coding->charbuf;
1477   int *charbuf_end = charbuf + coding->charbuf_used;
1478   unsigned char *dst = coding->destination + coding->produced;
1479   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1480   int produced_chars = 0;
1481   int c;
1482
1483   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1484     {
1485       ASSURE_DESTINATION (3);
1486       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1487       CODING_UTF_8_BOM (coding) = utf_without_bom;
1488     }
1489
1490   if (multibytep)
1491     {
1492       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1493
1494       while (charbuf < charbuf_end)
1495         {
1496           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1497
1498           ASSURE_DESTINATION (safe_room);
1499           c = *charbuf++;
1500           if (CHAR_BYTE8_P (c))
1501             {
1502               c = CHAR_TO_BYTE8 (c);
1503               EMIT_ONE_BYTE (c);
1504             }
1505           else
1506             {
1507               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1508               for (p = str; p < pend; p++)
1509                 EMIT_ONE_BYTE (*p);
1510             }
1511         }
1512     }
1513   else
1514     {
1515       int safe_room = MAX_MULTIBYTE_LENGTH;
1516
1517       while (charbuf < charbuf_end)
1518         {
1519           ASSURE_DESTINATION (safe_room);
1520           c = *charbuf++;
1521           if (CHAR_BYTE8_P (c))
1522             *dst++ = CHAR_TO_BYTE8 (c);
1523           else
1524             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1525           produced_chars++;
1526         }
1527     }
1528   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1529   coding->produced_char += produced_chars;
1530   coding->produced = dst - coding->destination;
1531   return 0;
1532 }
1533
1534
1535 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1536    Check if a text is encoded in one of UTF-16 based coding systems.
1537    If it is, return 1, else return 0.  */
1538
1539 #define UTF_16_HIGH_SURROGATE_P(val) \
1540   (((val) & 0xFC00) == 0xD800)
1541
1542 #define UTF_16_LOW_SURROGATE_P(val) \
1543   (((val) & 0xFC00) == 0xDC00)
1544
1545 #define UTF_16_INVALID_P(val)   \
1546   (((val) == 0xFFFE)            \
1547    || ((val) == 0xFFFF)         \
1548    || UTF_16_LOW_SURROGATE_P (val))
1549
1550
1551 static int
1552 detect_coding_utf_16 (coding, detect_info)
1553      struct coding_system *coding;
1554      struct coding_detection_info *detect_info;
1555 {
1556   const unsigned char *src = coding->source, *src_base = src;
1557   const unsigned char *src_end = coding->source + coding->src_bytes;
1558   int multibytep = coding->src_multibyte;
1559   int consumed_chars = 0;
1560   int c1, c2;
1561
1562   detect_info->checked |= CATEGORY_MASK_UTF_16;
1563   if (coding->mode & CODING_MODE_LAST_BLOCK
1564       && (coding->src_chars & 1))
1565     {
1566       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1567       return 0;
1568     }
1569
1570   ONE_MORE_BYTE (c1);
1571   ONE_MORE_BYTE (c2);
1572   if ((c1 == 0xFF) && (c2 == 0xFE))
1573     {
1574       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1575                              | CATEGORY_MASK_UTF_16_AUTO);
1576       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1577                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1578                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1579     }
1580   else if ((c1 == 0xFE) && (c2 == 0xFF))
1581     {
1582       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1583                              | CATEGORY_MASK_UTF_16_AUTO);
1584       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1585                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1586                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1587     }
1588   else
1589     {
1590       /* We check the dispersion of Eth and Oth bytes where E is even and
1591          O is odd.  If both are high, we assume binary data.*/
1592       unsigned char e[256], o[256];
1593       unsigned e_num = 1, o_num = 1;
1594
1595       memset (e, 0, 256);
1596       memset (o, 0, 256);
1597       e[c1] = 1;
1598       o[c2] = 1;
1599
1600       detect_info->rejected
1601         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1602
1603       while (1)
1604         {
1605           ONE_MORE_BYTE (c1);
1606           ONE_MORE_BYTE (c2);
1607           if (! e[c1])
1608             {
1609               e[c1] = 1;
1610               e_num++;
1611               if (e_num >= 128)
1612                 break;
1613             }
1614           if (! o[c2])
1615             {
1616               o[c1] = 1;
1617               o_num++;
1618               if (o_num >= 128)
1619                 break;
1620             }
1621         }
1622       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1623       return 0;
1624     }
1625
1626  no_more_source:
1627   return 1;
1628 }
1629
1630 static void
1631 decode_coding_utf_16 (coding)
1632      struct coding_system *coding;
1633 {
1634   const unsigned char *src = coding->source + coding->consumed;
1635   const unsigned char *src_end = coding->source + coding->src_bytes;
1636   const unsigned char *src_base;
1637   int *charbuf = coding->charbuf + coding->charbuf_used;
1638   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1639   int consumed_chars = 0, consumed_chars_base;
1640   int multibytep = coding->src_multibyte;
1641   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1642   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1643   int surrogate = CODING_UTF_16_SURROGATE (coding);
1644   Lisp_Object attr, charset_list;
1645   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1646   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1647
1648   CODING_GET_INFO (coding, attr, charset_list);
1649
1650   if (bom == utf_with_bom)
1651     {
1652       int c, c1, c2;
1653
1654       src_base = src;
1655       ONE_MORE_BYTE (c1);
1656       ONE_MORE_BYTE (c2);
1657       c = (c1 << 8) | c2;
1658
1659       if (endian == utf_16_big_endian
1660           ? c != 0xFEFF : c != 0xFFFE)
1661         {
1662           /* The first two bytes are not BOM.  Treat them as bytes
1663              for a normal character.  */
1664           src = src_base;
1665           coding->errors++;
1666         }
1667       CODING_UTF_16_BOM (coding) = utf_without_bom;
1668     }
1669   else if (bom == utf_detect_bom)
1670     {
1671       /* We have already tried to detect BOM and failed in
1672          detect_coding.  */
1673       CODING_UTF_16_BOM (coding) = utf_without_bom;
1674     }
1675
1676   while (1)
1677     {
1678       int c, c1, c2;
1679
1680       src_base = src;
1681       consumed_chars_base = consumed_chars;
1682
1683       if (charbuf + 2 >= charbuf_end)
1684         break;
1685
1686       if (byte_after_cr1 >= 0)
1687         c1 = byte_after_cr1, byte_after_cr1 = -1;
1688       else
1689         ONE_MORE_BYTE (c1);
1690       if (c1 < 0)
1691         {
1692           *charbuf++ = -c1;
1693           continue;
1694         }
1695       if (byte_after_cr2 >= 0)
1696         c2 = byte_after_cr2, byte_after_cr2 = -1;
1697       else
1698         ONE_MORE_BYTE (c2);
1699       if (c2 < 0)
1700         {
1701           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1702           *charbuf++ = -c2;
1703           continue;
1704         }
1705       c = (endian == utf_16_big_endian
1706            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1707
1708       if (surrogate)
1709         {
1710           if (! UTF_16_LOW_SURROGATE_P (c))
1711             {
1712               if (endian == utf_16_big_endian)
1713                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1714               else
1715                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1716               *charbuf++ = c1;
1717               *charbuf++ = c2;
1718               coding->errors++;
1719               if (UTF_16_HIGH_SURROGATE_P (c))
1720                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1721               else
1722                 *charbuf++ = c;
1723             }
1724           else
1725             {
1726               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1727               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1728               *charbuf++ = 0x10000 + c;
1729             }
1730         }
1731       else
1732         {
1733           if (UTF_16_HIGH_SURROGATE_P (c))
1734             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1735           else
1736             {
1737               if (eol_crlf && c == '\r')
1738                 {
1739                   ONE_MORE_BYTE (byte_after_cr1);
1740                   ONE_MORE_BYTE (byte_after_cr2);
1741                 }
1742               *charbuf++ = c;
1743             }
1744         }
1745     }
1746
1747  no_more_source:
1748   coding->consumed_char += consumed_chars_base;
1749   coding->consumed = src_base - coding->source;
1750   coding->charbuf_used = charbuf - coding->charbuf;
1751 }
1752
1753 static int
1754 encode_coding_utf_16 (coding)
1755      struct coding_system *coding;
1756 {
1757   int multibytep = coding->dst_multibyte;
1758   int *charbuf = coding->charbuf;
1759   int *charbuf_end = charbuf + coding->charbuf_used;
1760   unsigned char *dst = coding->destination + coding->produced;
1761   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1762   int safe_room = 8;
1763   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1764   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1765   int produced_chars = 0;
1766   Lisp_Object attrs, charset_list;
1767   int c;
1768
1769   CODING_GET_INFO (coding, attrs, charset_list);
1770
1771   if (bom != utf_without_bom)
1772     {
1773       ASSURE_DESTINATION (safe_room);
1774       if (big_endian)
1775         EMIT_TWO_BYTES (0xFE, 0xFF);
1776       else
1777         EMIT_TWO_BYTES (0xFF, 0xFE);
1778       CODING_UTF_16_BOM (coding) = utf_without_bom;
1779     }
1780
1781   while (charbuf < charbuf_end)
1782     {
1783       ASSURE_DESTINATION (safe_room);
1784       c = *charbuf++;
1785       if (c >= MAX_UNICODE_CHAR)
1786         c = coding->default_char;
1787
1788       if (c < 0x10000)
1789         {
1790           if (big_endian)
1791             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1792           else
1793             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1794         }
1795       else
1796         {
1797           int c1, c2;
1798
1799           c -= 0x10000;
1800           c1 = (c >> 10) + 0xD800;
1801           c2 = (c & 0x3FF) + 0xDC00;
1802           if (big_endian)
1803             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1804           else
1805             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1806         }
1807     }
1808   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1809   coding->produced = dst - coding->destination;
1810   coding->produced_char += produced_chars;
1811   return 0;
1812 }
1813
1814 \f
1815 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1816
1817 /* Emacs' internal format for representation of multiple character
1818    sets is a kind of multi-byte encoding, i.e. characters are
1819    represented by variable-length sequences of one-byte codes.
1820
1821    ASCII characters and control characters (e.g. `tab', `newline') are
1822    represented by one-byte sequences which are their ASCII codes, in
1823    the range 0x00 through 0x7F.
1824
1825    8-bit characters of the range 0x80..0x9F are represented by
1826    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1827    code + 0x20).
1828
1829    8-bit characters of the range 0xA0..0xFF are represented by
1830    one-byte sequences which are their 8-bit code.
1831
1832    The other characters are represented by a sequence of `base
1833    leading-code', optional `extended leading-code', and one or two
1834    `position-code's.  The length of the sequence is determined by the
1835    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1836    whereas extended leading-code and position-code take the range 0xA0
1837    through 0xFF.  See `charset.h' for more details about leading-code
1838    and position-code.
1839
1840    --- CODE RANGE of Emacs' internal format ---
1841    character set        range
1842    -------------        -----
1843    ascii                0x00..0x7F
1844    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1845    eight-bit-graphic    0xA0..0xBF
1846    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1847    ---------------------------------------------
1848
1849    As this is the internal character representation, the format is
1850    usually not used externally (i.e. in a file or in a data sent to a
1851    process).  But, it is possible to have a text externally in this
1852    format (i.e. by encoding by the coding system `emacs-mule').
1853
1854    In that case, a sequence of one-byte codes has a slightly different
1855    form.
1856
1857    At first, all characters in eight-bit-control are represented by
1858    one-byte sequences which are their 8-bit code.
1859
1860    Next, character composition data are represented by the byte
1861    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1862    where,
1863         METHOD is 0xF0 plus one of composition method (enum
1864         composition_method),
1865
1866         BYTES is 0xA0 plus a byte length of this composition data,
1867
1868         CHARS is 0x20 plus a number of characters composed by this
1869         data,
1870
1871         COMPONENTs are characters of multibye form or composition
1872         rules encoded by two-byte of ASCII codes.
1873
1874    In addition, for backward compatibility, the following formats are
1875    also recognized as composition data on decoding.
1876
1877    0x80 MSEQ ...
1878    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1879
1880    Here,
1881         MSEQ is a multibyte form but in these special format:
1882           ASCII: 0xA0 ASCII_CODE+0x80,
1883           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1884         RULE is a one byte code of the range 0xA0..0xF0 that
1885         represents a composition rule.
1886   */
1887
1888 char emacs_mule_bytes[256];
1889
1890 int
1891 emacs_mule_char (coding, src, nbytes, nchars, id)
1892      struct coding_system *coding;
1893      const unsigned char *src;
1894      int *nbytes, *nchars, *id;
1895 {
1896   const unsigned char *src_end = coding->source + coding->src_bytes;
1897   const unsigned char *src_base = src;
1898   int multibytep = coding->src_multibyte;
1899   struct charset *charset;
1900   unsigned code;
1901   int c;
1902   int consumed_chars = 0;
1903
1904   ONE_MORE_BYTE (c);
1905   if (c < 0)
1906     {
1907       c = -c;
1908       charset = emacs_mule_charset[0];
1909     }
1910   else
1911     {
1912       if (c >= 0xA0)
1913         {
1914           /* Old style component character of a composition.  */
1915           if (c == 0xA0)
1916             {
1917               ONE_MORE_BYTE (c);
1918               c -= 0x80;
1919             }
1920           else
1921             c -= 0x20;
1922         }
1923
1924       switch (emacs_mule_bytes[c])
1925         {
1926         case 2:
1927           if (! (charset = emacs_mule_charset[c]))
1928             goto invalid_code;
1929           ONE_MORE_BYTE (c);
1930           if (c < 0xA0)
1931             goto invalid_code;
1932           code = c & 0x7F;
1933           break;
1934
1935         case 3:
1936           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1937               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1938             {
1939               ONE_MORE_BYTE (c);
1940               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
1941                 goto invalid_code;
1942               ONE_MORE_BYTE (c);
1943               if (c < 0xA0)
1944                 goto invalid_code;
1945               code = c & 0x7F;
1946             }
1947           else
1948             {
1949               if (! (charset = emacs_mule_charset[c]))
1950                 goto invalid_code;
1951               ONE_MORE_BYTE (c);
1952               if (c < 0xA0)
1953                 goto invalid_code;
1954               code = (c & 0x7F) << 8;
1955               ONE_MORE_BYTE (c);
1956               if (c < 0xA0)
1957                 goto invalid_code;
1958               code |= c & 0x7F;
1959             }
1960           break;
1961
1962         case 4:
1963           ONE_MORE_BYTE (c);
1964           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1965             goto invalid_code;
1966           ONE_MORE_BYTE (c);
1967           if (c < 0xA0)
1968             goto invalid_code;
1969           code = (c & 0x7F) << 8;
1970           ONE_MORE_BYTE (c);
1971           if (c < 0xA0)
1972             goto invalid_code;
1973           code |= c & 0x7F;
1974           break;
1975
1976         case 1:
1977           code = c;
1978           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1979                                      ? charset_ascii : charset_eight_bit);
1980           break;
1981
1982         default:
1983           abort ();
1984         }
1985       c = DECODE_CHAR (charset, code);
1986       if (c < 0)
1987         goto invalid_code;
1988     }
1989   *nbytes = src - src_base;
1990   *nchars = consumed_chars;
1991   if (id)
1992     *id = charset->id;
1993   return c;
1994
1995  no_more_source:
1996   return -2;
1997
1998  invalid_code:
1999   return -1;
2000 }
2001
2002
2003 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2004    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
2005    else return 0.  */
2006
2007 static int
2008 detect_coding_emacs_mule (coding, detect_info)
2009      struct coding_system *coding;
2010      struct coding_detection_info *detect_info;
2011 {
2012   const unsigned char *src = coding->source, *src_base;
2013   const unsigned char *src_end = coding->source + coding->src_bytes;
2014   int multibytep = coding->src_multibyte;
2015   int consumed_chars = 0;
2016   int c;
2017   int found = 0;
2018
2019   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
2020   /* A coding system of this category is always ASCII compatible.  */
2021   src += coding->head_ascii;
2022
2023   while (1)
2024     {
2025       src_base = src;
2026       ONE_MORE_BYTE (c);
2027       if (c < 0)
2028         continue;
2029       if (c == 0x80)
2030         {
2031           /* Perhaps the start of composite character.  We simple skip
2032              it because analyzing it is too heavy for detecting.  But,
2033              at least, we check that the composite character
2034              constitutes of more than 4 bytes.  */
2035           const unsigned char *src_base;
2036
2037         repeat:
2038           src_base = src;
2039           do
2040             {
2041               ONE_MORE_BYTE (c);
2042             }
2043           while (c >= 0xA0);
2044
2045           if (src - src_base <= 4)
2046             break;
2047           found = CATEGORY_MASK_EMACS_MULE;
2048           if (c == 0x80)
2049             goto repeat;
2050         }
2051
2052       if (c < 0x80)
2053         {
2054           if (c < 0x20
2055               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2056             break;
2057         }
2058       else
2059         {
2060           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2061
2062           while (more_bytes > 0)
2063             {
2064               ONE_MORE_BYTE (c);
2065               if (c < 0xA0)
2066                 {
2067                   src--;        /* Unread the last byte.  */
2068                   break;
2069                 }
2070               more_bytes--;
2071             }
2072           if (more_bytes != 0)
2073             break;
2074           found = CATEGORY_MASK_EMACS_MULE;
2075         }
2076     }
2077   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2078   return 0;
2079
2080  no_more_source:
2081   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2082     {
2083       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2084       return 0;
2085     }
2086   detect_info->found |= found;
2087   return 1;
2088 }
2089
2090
2091 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2092
2093 /* Decode a character represented as a component of composition
2094    sequence of Emacs 20/21 style at SRC.  Set C to that character and
2095    update SRC to the head of next character (or an encoded composition
2096    rule).  If SRC doesn't points a composition component, set C to -1.
2097    If SRC points an invalid byte sequence, global exit by a return
2098    value 0.  */
2099
2100 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
2101   do                                                            \
2102     {                                                           \
2103       int c;                                                    \
2104       int nbytes, nchars;                                       \
2105                                                                 \
2106       if (src == src_end)                                       \
2107         break;                                                  \
2108       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
2109       if (c < 0)                                                \
2110         {                                                       \
2111           if (c == -2)                                          \
2112             break;                                              \
2113           goto invalid_code;                                    \
2114         }                                                       \
2115       *buf++ = c;                                               \
2116       src += nbytes;                                            \
2117       consumed_chars += nchars;                                 \
2118     }                                                           \
2119   while (0)
2120
2121
2122 /* Decode a composition rule represented as a component of composition
2123    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
2124    and increment BUF.  If SRC points an invalid byte sequence, set C
2125    to -1.  */
2126
2127 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
2128   do {                                                  \
2129     int c, gref, nref;                                  \
2130                                                         \
2131     if (src >= src_end)                                 \
2132       goto invalid_code;                                \
2133     ONE_MORE_BYTE_NO_CHECK (c);                         \
2134     c -= 0xA0;                                          \
2135     if (c < 0 || c >= 81)                               \
2136       goto invalid_code;                                \
2137                                                         \
2138     gref = c / 9, nref = c % 9;                         \
2139     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2140   } while (0)
2141
2142
2143 /* Decode a composition rule represented as a component of composition
2144    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
2145    and increment BUF.  If SRC points an invalid byte sequence, set C
2146    to -1.  */
2147
2148 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
2149   do {                                                  \
2150     int gref, nref;                                     \
2151                                                         \
2152     if (src + 1>= src_end)                              \
2153       goto invalid_code;                                \
2154     ONE_MORE_BYTE_NO_CHECK (gref);                      \
2155     gref -= 0x20;                                       \
2156     ONE_MORE_BYTE_NO_CHECK (nref);                      \
2157     nref -= 0x20;                                       \
2158     if (gref < 0 || gref >= 81                          \
2159         || nref < 0 || nref >= 81)                      \
2160       goto invalid_code;                                \
2161     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2162   } while (0)
2163
2164
2165 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
2166   do {                                                                  \
2167     /* Emacs 21 style format.  The first three bytes at SRC are         \
2168        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
2169        the byte length of this composition information, CHARS is the    \
2170        number of characters composed by this composition.  */           \
2171     enum composition_method method = c - 0xF2;                          \
2172     int *charbuf_base = charbuf;                                        \
2173     int consumed_chars_limit;                                           \
2174     int nbytes, nchars;                                                 \
2175                                                                         \
2176     ONE_MORE_BYTE (c);                                                  \
2177     if (c < 0)                                                          \
2178       goto invalid_code;                                                \
2179     nbytes = c - 0xA0;                                                  \
2180     if (nbytes < 3)                                                     \
2181       goto invalid_code;                                                \
2182     ONE_MORE_BYTE (c);                                                  \
2183     if (c < 0)                                                          \
2184       goto invalid_code;                                                \
2185     nchars = c - 0xA0;                                                  \
2186     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2187     consumed_chars_limit = consumed_chars_base + nbytes;                \
2188     if (method != COMPOSITION_RELATIVE)                                 \
2189       {                                                                 \
2190         int i = 0;                                                      \
2191         while (consumed_chars < consumed_chars_limit)                   \
2192           {                                                             \
2193             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
2194               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
2195             else                                                        \
2196               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
2197             i++;                                                        \
2198           }                                                             \
2199         if (consumed_chars < consumed_chars_limit)                      \
2200           goto invalid_code;                                            \
2201         charbuf_base[0] -= i;                                           \
2202       }                                                                 \
2203   } while (0)
2204
2205
2206 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)                    \
2207   do {                                                                  \
2208     /* Emacs 20 style format for relative composition.  */              \
2209     /* Store multibyte form of characters to be composed.  */           \
2210     enum composition_method method = COMPOSITION_RELATIVE;              \
2211     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];                 \
2212     int *buf = components;                                              \
2213     int i, j;                                                           \
2214                                                                         \
2215     src = src_base;                                                     \
2216     ONE_MORE_BYTE (c);          /* skip 0x80 */                         \
2217     for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++)    \
2218       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                         \
2219     if (i < 2)                                                          \
2220       goto invalid_code;                                                \
2221     ADD_COMPOSITION_DATA (charbuf, i, method);                          \
2222     for (j = 0; j < i; j++)                                             \
2223       *charbuf++ = components[j];                                       \
2224   } while (0)
2225
2226
2227 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
2228   do {                                                          \
2229     /* Emacs 20 style format for rule-base composition.  */     \
2230     /* Store multibyte form of characters to be composed.  */   \
2231     enum composition_method method = COMPOSITION_WITH_RULE;     \
2232     int *charbuf_base = charbuf;                                \
2233     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
2234     int *buf = components;                                      \
2235     int i, j;                                                   \
2236                                                                 \
2237     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
2238     for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2239       {                                                         \
2240         if (*src < 0xA0)                                        \
2241           break;                                                \
2242         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
2243         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
2244       }                                                         \
2245     if (i <= 1 || (buf - components) % 2 == 0)                  \
2246       goto invalid_code;                                        \
2247     if (charbuf + i + (i / 2) + 1 >= charbuf_end)               \
2248       goto no_more_source;                                      \
2249     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
2250     i = i * 2 - 1;                                              \
2251     for (j = 0; j < i; j++)                                     \
2252       *charbuf++ = components[j];                               \
2253     charbuf_base[0] -= i;                                       \
2254     for (j = 0; j < i; j += 2)                                  \
2255       *charbuf++ = components[j];                               \
2256   } while (0)
2257
2258
2259 static void
2260 decode_coding_emacs_mule (coding)
2261      struct coding_system *coding;
2262 {
2263   const unsigned char *src = coding->source + coding->consumed;
2264   const unsigned char *src_end = coding->source + coding->src_bytes;
2265   const unsigned char *src_base;
2266   int *charbuf = coding->charbuf + coding->charbuf_used;
2267   int *charbuf_end
2268     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2269   int consumed_chars = 0, consumed_chars_base;
2270   int multibytep = coding->src_multibyte;
2271   Lisp_Object attrs, charset_list;
2272   int char_offset = coding->produced_char;
2273   int last_offset = char_offset;
2274   int last_id = charset_ascii;
2275   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2276   int byte_after_cr = -1;
2277
2278   CODING_GET_INFO (coding, attrs, charset_list);
2279
2280   while (1)
2281     {
2282       int c;
2283
2284       src_base = src;
2285       consumed_chars_base = consumed_chars;
2286
2287       if (charbuf >= charbuf_end)
2288         break;
2289
2290       if (byte_after_cr >= 0)
2291         c = byte_after_cr, byte_after_cr = -1;
2292       else
2293         ONE_MORE_BYTE (c);
2294       if (c < 0)
2295         {
2296           *charbuf++ = -c;
2297           char_offset++;
2298         }
2299       else if (c < 0x80)
2300         {
2301           if (eol_crlf && c == '\r')
2302             ONE_MORE_BYTE (byte_after_cr);
2303           *charbuf++ = c;
2304           char_offset++;
2305         }
2306       else if (c == 0x80)
2307         {
2308           ONE_MORE_BYTE (c);
2309           if (c < 0)
2310             goto invalid_code;
2311           if (c - 0xF2 >= COMPOSITION_RELATIVE
2312               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2313             DECODE_EMACS_MULE_21_COMPOSITION (c);
2314           else if (c < 0xC0)
2315             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2316           else if (c == 0xFF)
2317             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2318           else
2319             goto invalid_code;
2320         }
2321       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2322         {
2323           int nbytes, nchars;
2324           int id;
2325
2326           src = src_base;
2327           consumed_chars = consumed_chars_base;
2328           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2329           if (c < 0)
2330             {
2331               if (c == -2)
2332                 break;
2333               goto invalid_code;
2334             }
2335           if (last_id != id)
2336             {
2337               if (last_id != charset_ascii)
2338                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2339               last_id = id;
2340               last_offset = char_offset;
2341             }
2342           *charbuf++ = c;
2343           src += nbytes;
2344           consumed_chars += nchars;
2345           char_offset++;
2346         }
2347       else
2348         goto invalid_code;
2349       continue;
2350
2351     invalid_code:
2352       src = src_base;
2353       consumed_chars = consumed_chars_base;
2354       ONE_MORE_BYTE (c);
2355       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2356       char_offset++;
2357       coding->errors++;
2358     }
2359
2360  no_more_source:
2361   if (last_id != charset_ascii)
2362     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2363   coding->consumed_char += consumed_chars_base;
2364   coding->consumed = src_base - coding->source;
2365   coding->charbuf_used = charbuf - coding->charbuf;
2366 }
2367
2368
2369 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2370   do {                                          \
2371     if (id < 0xA0)                              \
2372       codes[0] = id, codes[1] = 0;              \
2373     else if (id < 0xE0)                         \
2374       codes[0] = 0x9A, codes[1] = id;           \
2375     else if (id < 0xF0)                         \
2376       codes[0] = 0x9B, codes[1] = id;           \
2377     else if (id < 0xF5)                         \
2378       codes[0] = 0x9C, codes[1] = id;           \
2379     else                                        \
2380       codes[0] = 0x9D, codes[1] = id;           \
2381   } while (0);
2382
2383
2384 static int
2385 encode_coding_emacs_mule (coding)
2386      struct coding_system *coding;
2387 {
2388   int multibytep = coding->dst_multibyte;
2389   int *charbuf = coding->charbuf;
2390   int *charbuf_end = charbuf + coding->charbuf_used;
2391   unsigned char *dst = coding->destination + coding->produced;
2392   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2393   int safe_room = 8;
2394   int produced_chars = 0;
2395   Lisp_Object attrs, charset_list;
2396   int c;
2397   int preferred_charset_id = -1;
2398
2399   CODING_GET_INFO (coding, attrs, charset_list);
2400   if (! EQ (charset_list, Vemacs_mule_charset_list))
2401     {
2402       CODING_ATTR_CHARSET_LIST (attrs)
2403         = charset_list = Vemacs_mule_charset_list;
2404     }
2405
2406   while (charbuf < charbuf_end)
2407     {
2408       ASSURE_DESTINATION (safe_room);
2409       c = *charbuf++;
2410
2411       if (c < 0)
2412         {
2413           /* Handle an annotation.  */
2414           switch (*charbuf)
2415             {
2416             case CODING_ANNOTATE_COMPOSITION_MASK:
2417               /* Not yet implemented.  */
2418               break;
2419             case CODING_ANNOTATE_CHARSET_MASK:
2420               preferred_charset_id = charbuf[3];
2421               if (preferred_charset_id >= 0
2422                   && NILP (Fmemq (make_number (preferred_charset_id),
2423                                   charset_list)))
2424                 preferred_charset_id = -1;
2425               break;
2426             default:
2427               abort ();
2428             }
2429           charbuf += -c - 1;
2430           continue;
2431         }
2432
2433       if (ASCII_CHAR_P (c))
2434         EMIT_ONE_ASCII_BYTE (c);
2435       else if (CHAR_BYTE8_P (c))
2436         {
2437           c = CHAR_TO_BYTE8 (c);
2438           EMIT_ONE_BYTE (c);
2439         }
2440       else
2441         {
2442           struct charset *charset;
2443           unsigned code;
2444           int dimension;
2445           int emacs_mule_id;
2446           unsigned char leading_codes[2];
2447
2448           if (preferred_charset_id >= 0)
2449             {
2450               charset = CHARSET_FROM_ID (preferred_charset_id);
2451               if (! CHAR_CHARSET_P (c, charset))
2452                 charset = char_charset (c, charset_list, NULL);
2453             }
2454           else
2455             charset = char_charset (c, charset_list, &code);
2456           if (! charset)
2457             {
2458               c = coding->default_char;
2459               if (ASCII_CHAR_P (c))
2460                 {
2461                   EMIT_ONE_ASCII_BYTE (c);
2462                   continue;
2463                 }
2464               charset = char_charset (c, charset_list, &code);
2465             }
2466           dimension = CHARSET_DIMENSION (charset);
2467           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2468           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2469           EMIT_ONE_BYTE (leading_codes[0]);
2470           if (leading_codes[1])
2471             EMIT_ONE_BYTE (leading_codes[1]);
2472           if (dimension == 1)
2473             EMIT_ONE_BYTE (code | 0x80);
2474           else
2475             {
2476               code |= 0x8080;
2477               EMIT_ONE_BYTE (code >> 8);
2478               EMIT_ONE_BYTE (code & 0xFF);
2479             }
2480         }
2481     }
2482   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2483   coding->produced_char += produced_chars;
2484   coding->produced = dst - coding->destination;
2485   return 0;
2486 }
2487
2488 \f
2489 /*** 7. ISO2022 handlers ***/
2490
2491 /* The following note describes the coding system ISO2022 briefly.
2492    Since the intention of this note is to help understand the
2493    functions in this file, some parts are NOT ACCURATE or are OVERLY
2494    SIMPLIFIED.  For thorough understanding, please refer to the
2495    original document of ISO2022.  This is equivalent to the standard
2496    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2497
2498    ISO2022 provides many mechanisms to encode several character sets
2499    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2500    is encoded using bytes less than 128.  This may make the encoded
2501    text a little bit longer, but the text passes more easily through
2502    several types of gateway, some of which strip off the MSB (Most
2503    Significant Bit).
2504
2505    There are two kinds of character sets: control character sets and
2506    graphic character sets.  The former contain control characters such
2507    as `newline' and `escape' to provide control functions (control
2508    functions are also provided by escape sequences).  The latter
2509    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2510    two control character sets and many graphic character sets.
2511
2512    Graphic character sets are classified into one of the following
2513    four classes, according to the number of bytes (DIMENSION) and
2514    number of characters in one dimension (CHARS) of the set:
2515    - DIMENSION1_CHARS94
2516    - DIMENSION1_CHARS96
2517    - DIMENSION2_CHARS94
2518    - DIMENSION2_CHARS96
2519
2520    In addition, each character set is assigned an identification tag,
2521    unique for each set, called the "final character" (denoted as <F>
2522    hereafter).  The <F> of each character set is decided by ECMA(*)
2523    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2524    (0x30..0x3F are for private use only).
2525
2526    Note (*): ECMA = European Computer Manufacturers Association
2527
2528    Here are examples of graphic character sets [NAME(<F>)]:
2529         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2530         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2531         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2532         o DIMENSION2_CHARS96 -- none for the moment
2533
2534    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2535         C0 [0x00..0x1F] -- control character plane 0
2536         GL [0x20..0x7F] -- graphic character plane 0
2537         C1 [0x80..0x9F] -- control character plane 1
2538         GR [0xA0..0xFF] -- graphic character plane 1
2539
2540    A control character set is directly designated and invoked to C0 or
2541    C1 by an escape sequence.  The most common case is that:
2542    - ISO646's  control character set is designated/invoked to C0, and
2543    - ISO6429's control character set is designated/invoked to C1,
2544    and usually these designations/invocations are omitted in encoded
2545    text.  In a 7-bit environment, only C0 can be used, and a control
2546    character for C1 is encoded by an appropriate escape sequence to
2547    fit into the environment.  All control characters for C1 are
2548    defined to have corresponding escape sequences.
2549
2550    A graphic character set is at first designated to one of four
2551    graphic registers (G0 through G3), then these graphic registers are
2552    invoked to GL or GR.  These designations and invocations can be
2553    done independently.  The most common case is that G0 is invoked to
2554    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2555    these invocations and designations are omitted in encoded text.
2556    In a 7-bit environment, only GL can be used.
2557
2558    When a graphic character set of CHARS94 is invoked to GL, codes
2559    0x20 and 0x7F of the GL area work as control characters SPACE and
2560    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2561    be used.
2562
2563    There are two ways of invocation: locking-shift and single-shift.
2564    With locking-shift, the invocation lasts until the next different
2565    invocation, whereas with single-shift, the invocation affects the
2566    following character only and doesn't affect the locking-shift
2567    state.  Invocations are done by the following control characters or
2568    escape sequences:
2569
2570    ----------------------------------------------------------------------
2571    abbrev  function                  cntrl escape seq   description
2572    ----------------------------------------------------------------------
2573    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2574    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2575    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2576    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2577    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2578    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2579    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2580    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2581    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2582    ----------------------------------------------------------------------
2583    (*) These are not used by any known coding system.
2584
2585    Control characters for these functions are defined by macros
2586    ISO_CODE_XXX in `coding.h'.
2587
2588    Designations are done by the following escape sequences:
2589    ----------------------------------------------------------------------
2590    escape sequence      description
2591    ----------------------------------------------------------------------
2592    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2593    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2594    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2595    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2596    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2597    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2598    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2599    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2600    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2601    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2602    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2603    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2604    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2605    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2606    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2607    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2608    ----------------------------------------------------------------------
2609
2610    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2611    of dimension 1, chars 94, and final character <F>, etc...
2612
2613    Note (*): Although these designations are not allowed in ISO2022,
2614    Emacs accepts them on decoding, and produces them on encoding
2615    CHARS96 character sets in a coding system which is characterized as
2616    7-bit environment, non-locking-shift, and non-single-shift.
2617
2618    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2619    '(' must be omitted.  We refer to this as "short-form" hereafter.
2620
2621    Now you may notice that there are a lot of ways of encoding the
2622    same multilingual text in ISO2022.  Actually, there exist many
2623    coding systems such as Compound Text (used in X11's inter client
2624    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2625    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2626    localized platforms), and all of these are variants of ISO2022.
2627
2628    In addition to the above, Emacs handles two more kinds of escape
2629    sequences: ISO6429's direction specification and Emacs' private
2630    sequence for specifying character composition.
2631
2632    ISO6429's direction specification takes the following form:
2633         o CSI ']'      -- end of the current direction
2634         o CSI '0' ']'  -- end of the current direction
2635         o CSI '1' ']'  -- start of left-to-right text
2636         o CSI '2' ']'  -- start of right-to-left text
2637    The control character CSI (0x9B: control sequence introducer) is
2638    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2639
2640    Character composition specification takes the following form:
2641         o ESC '0' -- start relative composition
2642         o ESC '1' -- end composition
2643         o ESC '2' -- start rule-base composition (*)
2644         o ESC '3' -- start relative composition with alternate chars  (**)
2645         o ESC '4' -- start rule-base composition with alternate chars  (**)
2646   Since these are not standard escape sequences of any ISO standard,
2647   the use of them with these meanings is restricted to Emacs only.
2648
2649   (*) This form is used only in Emacs 20.7 and older versions,
2650   but newer versions can safely decode it.
2651   (**) This form is used only in Emacs 21.1 and newer versions,
2652   and older versions can't decode it.
2653
2654   Here's a list of example usages of these composition escape
2655   sequences (categorized by `enum composition_method').
2656
2657   COMPOSITION_RELATIVE:
2658         ESC 0 CHAR [ CHAR ] ESC 1
2659   COMPOSITION_WITH_RULE:
2660         ESC 2 CHAR [ RULE CHAR ] ESC 1
2661   COMPOSITION_WITH_ALTCHARS:
2662         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2663   COMPOSITION_WITH_RULE_ALTCHARS:
2664         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2665
2666 enum iso_code_class_type iso_code_class[256];
2667
2668 #define SAFE_CHARSET_P(coding, id)      \
2669   ((id) <= (coding)->max_charset_id     \
2670    && (coding)->safe_charsets[id] >= 0)
2671
2672
2673 #define SHIFT_OUT_OK(category)  \
2674   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2675
2676 static void
2677 setup_iso_safe_charsets (attrs)
2678      Lisp_Object attrs;
2679 {
2680   Lisp_Object charset_list, safe_charsets;
2681   Lisp_Object request;
2682   Lisp_Object reg_usage;
2683   Lisp_Object tail;
2684   int reg94, reg96;
2685   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2686   int max_charset_id;
2687
2688   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2689   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2690       && ! EQ (charset_list, Viso_2022_charset_list))
2691     {
2692       CODING_ATTR_CHARSET_LIST (attrs)
2693         = charset_list = Viso_2022_charset_list;
2694       ASET (attrs, coding_attr_safe_charsets, Qnil);
2695     }
2696
2697   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2698     return;
2699
2700   max_charset_id = 0;
2701   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2702     {
2703       int id = XINT (XCAR (tail));
2704       if (max_charset_id < id)
2705         max_charset_id = id;
2706     }
2707
2708   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2709                                 make_number (255));
2710   request = AREF (attrs, coding_attr_iso_request);
2711   reg_usage = AREF (attrs, coding_attr_iso_usage);
2712   reg94 = XINT (XCAR (reg_usage));
2713   reg96 = XINT (XCDR (reg_usage));
2714
2715   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2716     {
2717       Lisp_Object id;
2718       Lisp_Object reg;
2719       struct charset *charset;
2720
2721       id = XCAR (tail);
2722       charset = CHARSET_FROM_ID (XINT (id));
2723       reg = Fcdr (Fassq (id, request));
2724       if (! NILP (reg))
2725         SSET (safe_charsets, XINT (id), XINT (reg));
2726       else if (charset->iso_chars_96)
2727         {
2728           if (reg96 < 4)
2729             SSET (safe_charsets, XINT (id), reg96);
2730         }
2731       else
2732         {
2733           if (reg94 < 4)
2734             SSET (safe_charsets, XINT (id), reg94);
2735         }
2736     }
2737   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2738 }
2739
2740
2741 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2742    Check if a text is encoded in one of ISO-2022 based codig systems.
2743    If it is, return 1, else return 0.  */
2744
2745 static int
2746 detect_coding_iso_2022 (coding, detect_info)
2747      struct coding_system *coding;
2748      struct coding_detection_info *detect_info;
2749 {
2750   const unsigned char *src = coding->source, *src_base = src;
2751   const unsigned char *src_end = coding->source + coding->src_bytes;
2752   int multibytep = coding->src_multibyte;
2753   int single_shifting = 0;
2754   int id;
2755   int c, c1;
2756   int consumed_chars = 0;
2757   int i;
2758   int rejected = 0;
2759   int found = 0;
2760
2761   detect_info->checked |= CATEGORY_MASK_ISO;
2762
2763   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2764     {
2765       struct coding_system *this = &(coding_categories[i]);
2766       Lisp_Object attrs, val;
2767
2768       if (this->id < 0)
2769         continue;
2770       attrs = CODING_ID_ATTRS (this->id);
2771       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2772           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2773         setup_iso_safe_charsets (attrs);
2774       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2775       this->max_charset_id = SCHARS (val) - 1;
2776       this->safe_charsets = (char *) SDATA (val);
2777     }
2778
2779   /* A coding system of this category is always ASCII compatible.  */
2780   src += coding->head_ascii;
2781
2782   while (rejected != CATEGORY_MASK_ISO)
2783     {
2784       src_base = src;
2785       ONE_MORE_BYTE (c);
2786       switch (c)
2787         {
2788         case ISO_CODE_ESC:
2789           if (inhibit_iso_escape_detection)
2790             break;
2791           single_shifting = 0;
2792           ONE_MORE_BYTE (c);
2793           if (c >= '(' && c <= '/')
2794             {
2795               /* Designation sequence for a charset of dimension 1.  */
2796               ONE_MORE_BYTE (c1);
2797               if (c1 < ' ' || c1 >= 0x80
2798                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2799                 /* Invalid designation sequence.  Just ignore.  */
2800                 break;
2801             }
2802           else if (c == '$')
2803             {
2804               /* Designation sequence for a charset of dimension 2.  */
2805               ONE_MORE_BYTE (c);
2806               if (c >= '@' && c <= 'B')
2807                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2808                 id = iso_charset_table[1][0][c];
2809               else if (c >= '(' && c <= '/')
2810                 {
2811                   ONE_MORE_BYTE (c1);
2812                   if (c1 < ' ' || c1 >= 0x80
2813                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2814                     /* Invalid designation sequence.  Just ignore.  */
2815                     break;
2816                 }
2817               else
2818                 /* Invalid designation sequence.  Just ignore it.  */
2819                 break;
2820             }
2821           else if (c == 'N' || c == 'O')
2822             {
2823               /* ESC <Fe> for SS2 or SS3.  */
2824               single_shifting = 1;
2825               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2826               break;
2827             }
2828           else if (c >= '0' && c <= '4')
2829             {
2830               /* ESC <Fp> for start/end composition.  */
2831               found |= CATEGORY_MASK_ISO;
2832               break;
2833             }
2834           else
2835             {
2836               /* Invalid escape sequence.  Just ignore it.  */
2837               break;
2838             }
2839
2840           /* We found a valid designation sequence for CHARSET.  */
2841           rejected |= CATEGORY_MASK_ISO_8BIT;
2842           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2843                               id))
2844             found |= CATEGORY_MASK_ISO_7;
2845           else
2846             rejected |= CATEGORY_MASK_ISO_7;
2847           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2848                               id))
2849             found |= CATEGORY_MASK_ISO_7_TIGHT;
2850           else
2851             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2852           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2853                               id))
2854             found |= CATEGORY_MASK_ISO_7_ELSE;
2855           else
2856             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2857           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2858                               id))
2859             found |= CATEGORY_MASK_ISO_8_ELSE;
2860           else
2861             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2862           break;
2863
2864         case ISO_CODE_SO:
2865         case ISO_CODE_SI:
2866           /* Locking shift out/in.  */
2867           if (inhibit_iso_escape_detection)
2868             break;
2869           single_shifting = 0;
2870           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2871           break;
2872
2873         case ISO_CODE_CSI:
2874           /* Control sequence introducer.  */
2875           single_shifting = 0;
2876           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2877           found |= CATEGORY_MASK_ISO_8_ELSE;
2878           goto check_extra_latin;
2879
2880         case ISO_CODE_SS2:
2881         case ISO_CODE_SS3:
2882           /* Single shift.   */
2883           if (inhibit_iso_escape_detection)
2884             break;
2885           single_shifting = 0;
2886           rejected |= CATEGORY_MASK_ISO_7BIT;
2887           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2888               & CODING_ISO_FLAG_SINGLE_SHIFT)
2889             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2890           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2891               & CODING_ISO_FLAG_SINGLE_SHIFT)
2892             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2893           if (single_shifting)
2894             break;
2895           goto check_extra_latin;
2896
2897         default:
2898           if (c < 0)
2899             continue;
2900           if (c < 0x80)
2901             {
2902               single_shifting = 0;
2903               break;
2904             }
2905           if (c >= 0xA0)
2906             {
2907               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2908               found |= CATEGORY_MASK_ISO_8_1;
2909               /* Check the length of succeeding codes of the range
2910                  0xA0..0FF.  If the byte length is even, we include
2911                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2912                  only when we are not single shifting.  */
2913               if (! single_shifting
2914                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2915                 {
2916                   int i = 1;
2917                   while (src < src_end)
2918                     {
2919                       ONE_MORE_BYTE (c);
2920                       if (c < 0xA0)
2921                         break;
2922                       i++;
2923                     }
2924
2925                   if (i & 1 && src < src_end)
2926                     rejected |= CATEGORY_MASK_ISO_8_2;
2927                   else
2928                     found |= CATEGORY_MASK_ISO_8_2;
2929                 }
2930               break;
2931             }
2932         check_extra_latin:
2933           single_shifting = 0;
2934           if (! VECTORP (Vlatin_extra_code_table)
2935               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2936             {
2937               rejected = CATEGORY_MASK_ISO;
2938               break;
2939             }
2940           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2941               & CODING_ISO_FLAG_LATIN_EXTRA)
2942             found |= CATEGORY_MASK_ISO_8_1;
2943           else
2944             rejected |= CATEGORY_MASK_ISO_8_1;
2945           rejected |= CATEGORY_MASK_ISO_8_2;
2946         }
2947     }
2948   detect_info->rejected |= CATEGORY_MASK_ISO;
2949   return 0;
2950
2951  no_more_source:
2952   detect_info->rejected |= rejected;
2953   detect_info->found |= (found & ~rejected);
2954   return 1;
2955 }
2956
2957
2958 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
2959    escape sequence should be kept.  */
2960 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2961   do {                                                                  \
2962     int id, prev;                                                       \
2963                                                                         \
2964     if (final < '0' || final >= 128                                     \
2965         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2966         || !SAFE_CHARSET_P (coding, id))                                \
2967       {                                                                 \
2968         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2969         chars_96 = -1;                                                  \
2970         break;                                                          \
2971       }                                                                 \
2972     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2973     if (id == charset_jisx0201_roman)                                   \
2974       {                                                                 \
2975         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2976           id = charset_ascii;                                           \
2977       }                                                                 \
2978     else if (id == charset_jisx0208_1978)                               \
2979       {                                                                 \
2980         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2981           id = charset_jisx0208;                                        \
2982       }                                                                 \
2983     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2984     /* If there was an invalid designation to REG previously, and this  \
2985        designation is ASCII to REG, we should keep this designation     \
2986        sequence.  */                                                    \
2987     if (prev == -2 && id == charset_ascii)                              \
2988       chars_96 = -1;                                                    \
2989   } while (0)
2990
2991
2992 #define MAYBE_FINISH_COMPOSITION()                              \
2993   do {                                                          \
2994     int i;                                                      \
2995     if (composition_state == COMPOSING_NO)                      \
2996       break;                                                    \
2997     /* It is assured that we have enough room for producing     \
2998        characters stored in the table `components'.  */         \
2999     if (charbuf + component_idx > charbuf_end)                  \
3000       goto no_more_source;                                      \
3001     composition_state = COMPOSING_NO;                           \
3002     if (method == COMPOSITION_RELATIVE                          \
3003         || method == COMPOSITION_WITH_ALTCHARS)                 \
3004       {                                                         \
3005         for (i = 0; i < component_idx; i++)                     \
3006           *charbuf++ = components[i];                           \
3007         char_offset += component_idx;                           \
3008       }                                                         \
3009     else                                                        \
3010       {                                                         \
3011         for (i = 0; i < component_idx; i += 2)                  \
3012           *charbuf++ = components[i];                           \
3013         char_offset += (component_idx / 2) + 1;                 \
3014       }                                                         \
3015   } while (0)
3016
3017
3018 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3019    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3020    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3021    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3022    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3023   */
3024
3025 #define DECODE_COMPOSITION_START(c1)                                    \
3026   do {                                                                  \
3027     if (c1 == '0'                                                       \
3028         && composition_state == COMPOSING_COMPONENT_RULE)               \
3029       {                                                                 \
3030         component_len = component_idx;                                  \
3031         composition_state = COMPOSING_CHAR;                             \
3032       }                                                                 \
3033     else                                                                \
3034       {                                                                 \
3035         const unsigned char *p;                                         \
3036                                                                         \
3037         MAYBE_FINISH_COMPOSITION ();                                    \
3038         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
3039           goto no_more_source;                                          \
3040         for (p = src; p < src_end - 1; p++)                             \
3041           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
3042             break;                                                      \
3043         if (p == src_end - 1)                                           \
3044           {                                                             \
3045             /* The current composition doesn't end in the current       \
3046                source.  */                                              \
3047             record_conversion_result                                    \
3048               (coding, CODING_RESULT_INSUFFICIENT_SRC);                 \
3049             goto no_more_source;                                        \
3050           }                                                             \
3051                                                                         \
3052         /* This is surely the start of a composition.  */               \
3053         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
3054                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
3055                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
3056                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
3057         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
3058                              : COMPOSING_COMPONENT_CHAR);               \
3059         component_idx = component_len = 0;                              \
3060       }                                                                 \
3061   } while (0)
3062
3063
3064 /* Handle compositoin end sequence ESC 1.  */
3065
3066 #define DECODE_COMPOSITION_END()                                        \
3067   do {                                                                  \
3068     int nchars = (component_len > 0 ? component_idx - component_len     \
3069                   : method == COMPOSITION_RELATIVE ? component_idx      \
3070                   : (component_idx + 1) / 2);                           \
3071     int i;                                                              \
3072     int *saved_charbuf = charbuf;                                       \
3073                                                                         \
3074     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
3075     if (method != COMPOSITION_RELATIVE)                                 \
3076       {                                                                 \
3077         if (component_len == 0)                                         \
3078           for (i = 0; i < component_idx; i++)                           \
3079             *charbuf++ = components[i];                                 \
3080         else                                                            \
3081           for (i = 0; i < component_len; i++)                           \
3082             *charbuf++ = components[i];                                 \
3083         *saved_charbuf = saved_charbuf - charbuf;                       \
3084       }                                                                 \
3085     if (method == COMPOSITION_WITH_RULE)                                \
3086       for (i = 0; i < component_idx; i += 2, char_offset++)             \
3087         *charbuf++ = components[i];                                     \
3088     else                                                                \
3089       for (i = component_len; i < component_idx; i++, char_offset++)    \
3090         *charbuf++ = components[i];                                     \
3091     coding->annotated = 1;                                              \
3092     composition_state = COMPOSING_NO;                                   \
3093   } while (0)
3094
3095
3096 /* Decode a composition rule from the byte C1 (and maybe one more byte
3097    from SRC) and store one encoded composition rule in
3098    coding->cmp_data.  */
3099
3100 #define DECODE_COMPOSITION_RULE(c1)                                     \
3101   do {                                                                  \
3102     (c1) -= 32;                                                         \
3103     if (c1 < 81)                /* old format (before ver.21) */        \
3104       {                                                                 \
3105         int gref = (c1) / 9;                                            \
3106         int nref = (c1) % 9;                                            \
3107         if (gref == 4) gref = 10;                                       \
3108         if (nref == 4) nref = 10;                                       \
3109         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
3110       }                                                                 \
3111     else if (c1 < 93)           /* new format (after ver.21) */         \
3112       {                                                                 \
3113         ONE_MORE_BYTE (c2);                                             \
3114         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
3115       }                                                                 \
3116     else                                                                \
3117       c1 = 0;                                                           \
3118   } while (0)
3119
3120
3121 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3122
3123 static void
3124 decode_coding_iso_2022 (coding)
3125      struct coding_system *coding;
3126 {
3127   const unsigned char *src = coding->source + coding->consumed;
3128   const unsigned char *src_end = coding->source + coding->src_bytes;
3129   const unsigned char *src_base;
3130   int *charbuf = coding->charbuf + coding->charbuf_used;
3131   int *charbuf_end
3132     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
3133   int consumed_chars = 0, consumed_chars_base;
3134   int multibytep = coding->src_multibyte;
3135   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3136   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3137   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3138   int charset_id_2, charset_id_3;
3139   struct charset *charset;
3140   int c;
3141   /* For handling composition sequence.  */
3142 #define COMPOSING_NO                    0
3143 #define COMPOSING_CHAR                  1
3144 #define COMPOSING_RULE                  2
3145 #define COMPOSING_COMPONENT_CHAR        3
3146 #define COMPOSING_COMPONENT_RULE        4
3147
3148   int composition_state = COMPOSING_NO;
3149   enum composition_method method;
3150   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3151   int component_idx;
3152   int component_len;
3153   Lisp_Object attrs, charset_list;
3154   int char_offset = coding->produced_char;
3155   int last_offset = char_offset;
3156   int last_id = charset_ascii;
3157   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3158   int byte_after_cr = -1;
3159
3160   CODING_GET_INFO (coding, attrs, charset_list);
3161   setup_iso_safe_charsets (attrs);
3162   /* Charset list may have been changed.  */
3163   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3164   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3165
3166   while (1)
3167     {
3168       int c1, c2;
3169
3170       src_base = src;
3171       consumed_chars_base = consumed_chars;
3172
3173       if (charbuf >= charbuf_end)
3174         break;
3175
3176       if (byte_after_cr >= 0)
3177         c1 = byte_after_cr, byte_after_cr = -1;
3178       else
3179         ONE_MORE_BYTE (c1);
3180       if (c1 < 0)
3181         goto invalid_code;
3182
3183       /* We produce at most one character.  */
3184       switch (iso_code_class [c1])
3185         {
3186         case ISO_0x20_or_0x7F:
3187           if (composition_state != COMPOSING_NO)
3188             {
3189               if (composition_state == COMPOSING_RULE
3190                   || composition_state == COMPOSING_COMPONENT_RULE)
3191                 {
3192                   DECODE_COMPOSITION_RULE (c1);
3193                   components[component_idx++] = c1;
3194                   composition_state--;
3195                   continue;
3196                 }
3197             }
3198           if (charset_id_0 < 0
3199               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3200             /* This is SPACE or DEL.  */
3201             charset = CHARSET_FROM_ID (charset_ascii);
3202           else
3203             charset = CHARSET_FROM_ID (charset_id_0);
3204           break;
3205
3206         case ISO_graphic_plane_0:
3207           if (composition_state != COMPOSING_NO)
3208             {
3209               if (composition_state == COMPOSING_RULE
3210                   || composition_state == COMPOSING_COMPONENT_RULE)
3211                 {
3212                   DECODE_COMPOSITION_RULE (c1);
3213                   components[component_idx++] = c1;
3214                   composition_state--;
3215                   continue;
3216                 }
3217             }
3218           if (charset_id_0 < 0)
3219             charset = CHARSET_FROM_ID (charset_ascii);
3220           else
3221             charset = CHARSET_FROM_ID (charset_id_0);
3222           break;
3223
3224         case ISO_0xA0_or_0xFF:
3225           if (charset_id_1 < 0
3226               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3227               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3228             goto invalid_code;
3229           /* This is a graphic character, we fall down ... */
3230
3231         case ISO_graphic_plane_1:
3232           if (charset_id_1 < 0)
3233             goto invalid_code;
3234           charset = CHARSET_FROM_ID (charset_id_1);
3235           break;
3236
3237         case ISO_control_0:
3238           if (eol_crlf && c1 == '\r')
3239             ONE_MORE_BYTE (byte_after_cr);
3240           MAYBE_FINISH_COMPOSITION ();
3241           charset = CHARSET_FROM_ID (charset_ascii);
3242           break;
3243
3244         case ISO_control_1:
3245           MAYBE_FINISH_COMPOSITION ();
3246           goto invalid_code;
3247
3248         case ISO_shift_out:
3249           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3250               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3251             goto invalid_code;
3252           CODING_ISO_INVOCATION (coding, 0) = 1;
3253           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3254           continue;
3255
3256         case ISO_shift_in:
3257           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3258             goto invalid_code;
3259           CODING_ISO_INVOCATION (coding, 0) = 0;
3260           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3261           continue;
3262
3263         case ISO_single_shift_2_7:
3264         case ISO_single_shift_2:
3265           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3266             goto invalid_code;
3267           /* SS2 is handled as an escape sequence of ESC 'N' */
3268           c1 = 'N';
3269           goto label_escape_sequence;
3270
3271         case ISO_single_shift_3:
3272           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3273             goto invalid_code;
3274           /* SS2 is handled as an escape sequence of ESC 'O' */
3275           c1 = 'O';
3276           goto label_escape_sequence;
3277
3278         case ISO_control_sequence_introducer:
3279           /* CSI is handled as an escape sequence of ESC '[' ...  */
3280           c1 = '[';
3281           goto label_escape_sequence;
3282
3283         case ISO_escape:
3284           ONE_MORE_BYTE (c1);
3285         label_escape_sequence:
3286           /* Escape sequences handled here are invocation,
3287              designation, direction specification, and character
3288              composition specification.  */
3289           switch (c1)
3290             {
3291             case '&':           /* revision of following character set */
3292               ONE_MORE_BYTE (c1);
3293               if (!(c1 >= '@' && c1 <= '~'))
3294                 goto invalid_code;
3295               ONE_MORE_BYTE (c1);
3296               if (c1 != ISO_CODE_ESC)
3297                 goto invalid_code;
3298               ONE_MORE_BYTE (c1);
3299               goto label_escape_sequence;
3300
3301             case '$':           /* designation of 2-byte character set */
3302               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3303                 goto invalid_code;
3304               {
3305                 int reg, chars96;
3306
3307                 ONE_MORE_BYTE (c1);
3308                 if (c1 >= '@' && c1 <= 'B')
3309                   {     /* designation of JISX0208.1978, GB2312.1980,
3310                            or JISX0208.1980 */
3311                     reg = 0, chars96 = 0;
3312                   }
3313                 else if (c1 >= 0x28 && c1 <= 0x2B)
3314                   { /* designation of DIMENSION2_CHARS94 character set */
3315                     reg = c1 - 0x28, chars96 = 0;
3316                     ONE_MORE_BYTE (c1);
3317                   }
3318                 else if (c1 >= 0x2C && c1 <= 0x2F)
3319                   { /* designation of DIMENSION2_CHARS96 character set */
3320                     reg = c1 - 0x2C, chars96 = 1;
3321                     ONE_MORE_BYTE (c1);
3322                   }
3323                 else
3324                   goto invalid_code;
3325                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3326                 /* We must update these variables now.  */
3327                 if (reg == 0)
3328                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3329                 else if (reg == 1)
3330                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3331                 if (chars96 < 0)
3332                   goto invalid_code;
3333               }
3334               continue;
3335
3336             case 'n':           /* invocation of locking-shift-2 */
3337               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3338                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3339                 goto invalid_code;
3340               CODING_ISO_INVOCATION (coding, 0) = 2;
3341               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3342               continue;
3343
3344             case 'o':           /* invocation of locking-shift-3 */
3345               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3346                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3347                 goto invalid_code;
3348               CODING_ISO_INVOCATION (coding, 0) = 3;
3349               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3350               continue;
3351
3352             case 'N':           /* invocation of single-shift-2 */
3353               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3354                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3355                 goto invalid_code;
3356               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3357               if (charset_id_2 < 0)
3358                 charset = CHARSET_FROM_ID (charset_ascii);
3359               else
3360                 charset = CHARSET_FROM_ID (charset_id_2);
3361               ONE_MORE_BYTE (c1);
3362               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3363                 goto invalid_code;
3364               break;
3365
3366             case 'O':           /* invocation of single-shift-3 */
3367               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3368                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3369                 goto invalid_code;
3370               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3371               if (charset_id_3 < 0)
3372                 charset = CHARSET_FROM_ID (charset_ascii);
3373               else
3374                 charset = CHARSET_FROM_ID (charset_id_3);
3375               ONE_MORE_BYTE (c1);
3376               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3377                 goto invalid_code;
3378               break;
3379
3380             case '0': case '2': case '3': case '4': /* start composition */
3381               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3382                 goto invalid_code;
3383               DECODE_COMPOSITION_START (c1);
3384               continue;
3385
3386             case '1':           /* end composition */
3387               if (composition_state == COMPOSING_NO)
3388                 goto invalid_code;
3389               DECODE_COMPOSITION_END ();
3390               continue;
3391
3392             case '[':           /* specification of direction */
3393               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3394                 goto invalid_code;
3395               /* For the moment, nested direction is not supported.
3396                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3397                  left-to-right, and nozero means right-to-left.  */
3398               ONE_MORE_BYTE (c1);
3399               switch (c1)
3400                 {
3401                 case ']':       /* end of the current direction */
3402                   coding->mode &= ~CODING_MODE_DIRECTION;
3403
3404                 case '0':       /* end of the current direction */
3405                 case '1':       /* start of left-to-right direction */
3406                   ONE_MORE_BYTE (c1);
3407                   if (c1 == ']')
3408                     coding->mode &= ~CODING_MODE_DIRECTION;
3409                   else
3410                     goto invalid_code;
3411                   break;
3412
3413                 case '2':       /* start of right-to-left direction */
3414                   ONE_MORE_BYTE (c1);
3415                   if (c1 == ']')
3416                     coding->mode |= CODING_MODE_DIRECTION;
3417                   else
3418                     goto invalid_code;
3419                   break;
3420
3421                 default:
3422                   goto invalid_code;
3423                 }
3424               continue;
3425
3426             case '%':
3427               ONE_MORE_BYTE (c1);
3428               if (c1 == '/')
3429                 {
3430                   /* CTEXT extended segment:
3431                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3432                      We keep these bytes as is for the moment.
3433                      They may be decoded by post-read-conversion.  */
3434                   int dim, M, L;
3435                   int size;
3436
3437                   ONE_MORE_BYTE (dim);
3438                   ONE_MORE_BYTE (M);
3439                   ONE_MORE_BYTE (L);
3440                   size = ((M - 128) * 128) + (L - 128);
3441                   if (charbuf + 8 + size > charbuf_end)
3442                     goto break_loop;
3443                   *charbuf++ = ISO_CODE_ESC;
3444                   *charbuf++ = '%';
3445                   *charbuf++ = '/';
3446                   *charbuf++ = dim;
3447                   *charbuf++ = BYTE8_TO_CHAR (M);
3448                   *charbuf++ = BYTE8_TO_CHAR (L);
3449                   while (size-- > 0)
3450                     {
3451                       ONE_MORE_BYTE (c1);
3452                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3453                     }
3454                 }
3455               else if (c1 == 'G')
3456                 {
3457                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3458                      ESC % G --UTF-8-BYTES-- ESC % @
3459                      We keep these bytes as is for the moment.
3460                      They may be decoded by post-read-conversion.  */
3461                   int *p = charbuf;
3462
3463                   if (p + 6 > charbuf_end)
3464                     goto break_loop;
3465                   *p++ = ISO_CODE_ESC;
3466                   *p++ = '%';
3467                   *p++ = 'G';
3468                   while (p < charbuf_end)
3469                     {
3470                       ONE_MORE_BYTE (c1);
3471                       if (c1 == ISO_CODE_ESC
3472                           && src + 1 < src_end
3473                           && src[0] == '%'
3474                           && src[1] == '@')
3475                         {
3476                           src += 2;
3477                           break;
3478                         }
3479                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3480                     }
3481                   if (p + 3 > charbuf_end)
3482                     goto break_loop;
3483                   *p++ = ISO_CODE_ESC;
3484                   *p++ = '%';
3485                   *p++ = '@';
3486                   charbuf = p;
3487                 }
3488               else
3489                 goto invalid_code;
3490               continue;
3491               break;
3492
3493             default:
3494               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3495                 goto invalid_code;
3496               {
3497                 int reg, chars96;
3498
3499                 if (c1 >= 0x28 && c1 <= 0x2B)
3500                   { /* designation of DIMENSION1_CHARS94 character set */
3501                     reg = c1 - 0x28, chars96 = 0;
3502                     ONE_MORE_BYTE (c1);
3503                   }
3504                 else if (c1 >= 0x2C && c1 <= 0x2F)
3505                   { /* designation of DIMENSION1_CHARS96 character set */
3506                     reg = c1 - 0x2C, chars96 = 1;
3507                     ONE_MORE_BYTE (c1);
3508                   }
3509                 else
3510                   goto invalid_code;
3511                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3512                 /* We must update these variables now.  */
3513                 if (reg == 0)
3514                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3515                 else if (reg == 1)
3516                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3517                 if (chars96 < 0)
3518                   goto invalid_code;
3519               }
3520               continue;
3521             }
3522         }
3523
3524       if (charset->id != charset_ascii
3525           && last_id != charset->id)
3526         {
3527           if (last_id != charset_ascii)
3528             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3529           last_id = charset->id;
3530           last_offset = char_offset;
3531         }
3532
3533       /* Now we know CHARSET and 1st position code C1 of a character.
3534          Produce a decoded character while getting 2nd position code
3535          C2 if necessary.  */
3536       c1 &= 0x7F;
3537       if (CHARSET_DIMENSION (charset) > 1)
3538         {
3539           ONE_MORE_BYTE (c2);
3540           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3541             /* C2 is not in a valid range.  */
3542             goto invalid_code;
3543           c1 = (c1 << 8) | (c2 & 0x7F);
3544           if (CHARSET_DIMENSION (charset) > 2)
3545             {
3546               ONE_MORE_BYTE (c2);
3547               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3548                 /* C2 is not in a valid range.  */
3549                 goto invalid_code;
3550               c1 = (c1 << 8) | (c2 & 0x7F);
3551             }
3552         }
3553
3554       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3555       if (c < 0)
3556         {
3557           MAYBE_FINISH_COMPOSITION ();
3558           for (; src_base < src; src_base++, char_offset++)
3559             {
3560               if (ASCII_BYTE_P (*src_base))
3561                 *charbuf++ = *src_base;
3562               else
3563                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3564             }
3565         }
3566       else if (composition_state == COMPOSING_NO)
3567         {
3568           *charbuf++ = c;
3569           char_offset++;
3570         }
3571       else
3572         {
3573           components[component_idx++] = c;
3574           if (method == COMPOSITION_WITH_RULE
3575               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3576                   && composition_state == COMPOSING_COMPONENT_CHAR))
3577             composition_state++;
3578         }
3579       continue;
3580
3581     invalid_code:
3582       MAYBE_FINISH_COMPOSITION ();
3583       src = src_base;
3584       consumed_chars = consumed_chars_base;
3585       ONE_MORE_BYTE (c);
3586       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3587       char_offset++;
3588       coding->errors++;
3589       continue;
3590
3591     break_loop:
3592       break;
3593     }
3594
3595  no_more_source:
3596   if (last_id != charset_ascii)
3597     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3598   coding->consumed_char += consumed_chars_base;
3599   coding->consumed = src_base - coding->source;
3600   coding->charbuf_used = charbuf - coding->charbuf;
3601 }
3602
3603
3604 /* ISO2022 encoding stuff.  */
3605
3606 /*
3607    It is not enough to say just "ISO2022" on encoding, we have to
3608    specify more details.  In Emacs, each coding system of ISO2022
3609    variant has the following specifications:
3610         1. Initial designation to G0 thru G3.
3611         2. Allows short-form designation?
3612         3. ASCII should be designated to G0 before control characters?
3613         4. ASCII should be designated to G0 at end of line?
3614         5. 7-bit environment or 8-bit environment?
3615         6. Use locking-shift?
3616         7. Use Single-shift?
3617    And the following two are only for Japanese:
3618         8. Use ASCII in place of JIS0201-1976-Roman?
3619         9. Use JISX0208-1983 in place of JISX0208-1978?
3620    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3621    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3622    details.
3623 */
3624
3625 /* Produce codes (escape sequence) for designating CHARSET to graphic
3626    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3627    '@', 'A', or 'B' and the coding system CODING allows, produce
3628    designation sequence of short-form.  */
3629
3630 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3631   do {                                                                  \
3632     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3633     char *intermediate_char_94 = "()*+";                                \
3634     char *intermediate_char_96 = ",-./";                                \
3635     int revision = -1;                                                  \
3636     int c;                                                              \
3637                                                                         \
3638     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3639       revision = CHARSET_ISO_REVISION (charset);                        \
3640                                                                         \
3641     if (revision >= 0)                                                  \
3642       {                                                                 \
3643         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3644         EMIT_ONE_BYTE ('@' + revision);                                 \
3645       }                                                                 \
3646     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3647     if (CHARSET_DIMENSION (charset) == 1)                               \
3648       {                                                                 \
3649         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3650           c = intermediate_char_94[reg];                                \
3651         else                                                            \
3652           c = intermediate_char_96[reg];                                \
3653         EMIT_ONE_ASCII_BYTE (c);                                        \
3654       }                                                                 \
3655     else                                                                \
3656       {                                                                 \
3657         EMIT_ONE_ASCII_BYTE ('$');                                      \
3658         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3659           {                                                             \
3660             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3661                 || reg != 0                                             \
3662                 || final_char < '@' || final_char > 'B')                \
3663               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3664           }                                                             \
3665         else                                                            \
3666           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3667       }                                                                 \
3668     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3669                                                                         \
3670     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3671   } while (0)
3672
3673
3674 /* The following two macros produce codes (control character or escape
3675    sequence) for ISO2022 single-shift functions (single-shift-2 and
3676    single-shift-3).  */
3677
3678 #define ENCODE_SINGLE_SHIFT_2                                           \
3679   do {                                                                  \
3680     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3681       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3682     else                                                                \
3683       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3684     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3685   } while (0)
3686
3687
3688 #define ENCODE_SINGLE_SHIFT_3                                           \
3689   do {                                                                  \
3690     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3691       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3692     else                                                                \
3693       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3694     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3695   } while (0)
3696
3697
3698 /* The following four macros produce codes (control character or
3699    escape sequence) for ISO2022 locking-shift functions (shift-in,
3700    shift-out, locking-shift-2, and locking-shift-3).  */
3701
3702 #define ENCODE_SHIFT_IN                                 \
3703   do {                                                  \
3704     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3705     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3706   } while (0)
3707
3708
3709 #define ENCODE_SHIFT_OUT                                \
3710   do {                                                  \
3711     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3712     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3713   } while (0)
3714
3715
3716 #define ENCODE_LOCKING_SHIFT_2                          \
3717   do {                                                  \
3718     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3719     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3720   } while (0)
3721
3722
3723 #define ENCODE_LOCKING_SHIFT_3                          \
3724   do {                                                  \
3725     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3726     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3727   } while (0)
3728
3729
3730 /* Produce codes for a DIMENSION1 character whose character set is
3731    CHARSET and whose position-code is C1.  Designation and invocation
3732    sequences are also produced in advance if necessary.  */
3733
3734 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3735   do {                                                                  \
3736     int id = CHARSET_ID (charset);                                      \
3737                                                                         \
3738     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3739         && id == charset_ascii)                                         \
3740       {                                                                 \
3741         id = charset_jisx0201_roman;                                    \
3742         charset = CHARSET_FROM_ID (id);                                 \
3743       }                                                                 \
3744                                                                         \
3745     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3746       {                                                                 \
3747         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3748           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3749         else                                                            \
3750           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3751         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3752         break;                                                          \
3753       }                                                                 \
3754     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3755       {                                                                 \
3756         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3757         break;                                                          \
3758       }                                                                 \
3759     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3760       {                                                                 \
3761         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3762         break;                                                          \
3763       }                                                                 \
3764     else                                                                \
3765       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3766          must invoke it, or, at first, designate it to some graphic     \
3767          register.  Then repeat the loop to actually produce the        \
3768          character.  */                                                 \
3769       dst = encode_invocation_designation (charset, coding, dst,        \
3770                                            &produced_chars);            \
3771   } while (1)
3772
3773
3774 /* Produce codes for a DIMENSION2 character whose character set is
3775    CHARSET and whose position-codes are C1 and C2.  Designation and
3776    invocation codes are also produced in advance if necessary.  */
3777
3778 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3779   do {                                                                  \
3780     int id = CHARSET_ID (charset);                                      \
3781                                                                         \
3782     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3783         && id == charset_jisx0208)                                      \
3784       {                                                                 \
3785         id = charset_jisx0208_1978;                                     \
3786         charset = CHARSET_FROM_ID (id);                                 \
3787       }                                                                 \
3788                                                                         \
3789     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3790       {                                                                 \
3791         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3792           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3793         else                                                            \
3794           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3795         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3796         break;                                                          \
3797       }                                                                 \
3798     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3799       {                                                                 \
3800         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3801         break;                                                          \
3802       }                                                                 \
3803     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3804       {                                                                 \
3805         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3806         break;                                                          \
3807       }                                                                 \
3808     else                                                                \
3809       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3810          must invoke it, or, at first, designate it to some graphic     \
3811          register.  Then repeat the loop to actually produce the        \
3812          character.  */                                                 \
3813       dst = encode_invocation_designation (charset, coding, dst,        \
3814                                            &produced_chars);            \
3815   } while (1)
3816
3817
3818 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3819   do {                                                                     \
3820     int code = ENCODE_CHAR ((charset),(c));                                \
3821                                                                            \
3822     if (CHARSET_DIMENSION (charset) == 1)                                  \
3823       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3824     else                                                                   \
3825       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3826   } while (0)
3827
3828
3829 /* Produce designation and invocation codes at a place pointed by DST
3830    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3831    Return new DST.  */
3832
3833 unsigned char *
3834 encode_invocation_designation (charset, coding, dst, p_nchars)
3835      struct charset *charset;
3836      struct coding_system *coding;
3837      unsigned char *dst;
3838      int *p_nchars;
3839 {
3840   int multibytep = coding->dst_multibyte;
3841   int produced_chars = *p_nchars;
3842   int reg;                      /* graphic register number */
3843   int id = CHARSET_ID (charset);
3844
3845   /* At first, check designations.  */
3846   for (reg = 0; reg < 4; reg++)
3847     if (id == CODING_ISO_DESIGNATION (coding, reg))
3848       break;
3849
3850   if (reg >= 4)
3851     {
3852       /* CHARSET is not yet designated to any graphic registers.  */
3853       /* At first check the requested designation.  */
3854       reg = CODING_ISO_REQUEST (coding, id);
3855       if (reg < 0)
3856         /* Since CHARSET requests no special designation, designate it
3857            to graphic register 0.  */
3858         reg = 0;
3859
3860       ENCODE_DESIGNATION (charset, reg, coding);
3861     }
3862
3863   if (CODING_ISO_INVOCATION (coding, 0) != reg
3864       && CODING_ISO_INVOCATION (coding, 1) != reg)
3865     {
3866       /* Since the graphic register REG is not invoked to any graphic
3867          planes, invoke it to graphic plane 0.  */
3868       switch (reg)
3869         {
3870         case 0:                 /* graphic register 0 */
3871           ENCODE_SHIFT_IN;
3872           break;
3873
3874         case 1:                 /* graphic register 1 */
3875           ENCODE_SHIFT_OUT;
3876           break;
3877
3878         case 2:                 /* graphic register 2 */
3879           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3880             ENCODE_SINGLE_SHIFT_2;
3881           else
3882             ENCODE_LOCKING_SHIFT_2;
3883           break;
3884
3885         case 3:                 /* graphic register 3 */
3886           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3887             ENCODE_SINGLE_SHIFT_3;
3888           else
3889             ENCODE_LOCKING_SHIFT_3;
3890           break;
3891         }
3892     }
3893
3894   *p_nchars = produced_chars;
3895   return dst;
3896 }
3897
3898 /* The following three macros produce codes for indicating direction
3899    of text.  */
3900 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3901   do {                                                                  \
3902     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3903       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3904     else                                                                \
3905       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3906   } while (0)
3907
3908
3909 #define ENCODE_DIRECTION_R2L()                  \
3910   do {                                          \
3911     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3912     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3913   } while (0)
3914
3915
3916 #define ENCODE_DIRECTION_L2R()                  \
3917   do {                                          \
3918     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3919     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3920   } while (0)
3921
3922
3923 /* Produce codes for designation and invocation to reset the graphic
3924    planes and registers to initial state.  */
3925 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3926   do {                                                                  \
3927     int reg;                                                            \
3928     struct charset *charset;                                            \
3929                                                                         \
3930     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3931       ENCODE_SHIFT_IN;                                                  \
3932     for (reg = 0; reg < 4; reg++)                                       \
3933       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3934           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3935               != CODING_ISO_INITIAL (coding, reg)))                     \
3936         {                                                               \
3937           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3938           ENCODE_DESIGNATION (charset, reg, coding);                    \
3939         }                                                               \
3940   } while (0)
3941
3942
3943 /* Produce designation sequences of charsets in the line started from
3944    SRC to a place pointed by DST, and return updated DST.
3945
3946    If the current block ends before any end-of-line, we may fail to
3947    find all the necessary designations.  */
3948
3949 static unsigned char *
3950 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3951      struct coding_system *coding;
3952      int *charbuf, *charbuf_end;
3953      unsigned char *dst;
3954 {
3955   struct charset *charset;
3956   /* Table of charsets to be designated to each graphic register.  */
3957   int r[4];
3958   int c, found = 0, reg;
3959   int produced_chars = 0;
3960   int multibytep = coding->dst_multibyte;
3961   Lisp_Object attrs;
3962   Lisp_Object charset_list;
3963
3964   attrs = CODING_ID_ATTRS (coding->id);
3965   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3966   if (EQ (charset_list, Qiso_2022))
3967     charset_list = Viso_2022_charset_list;
3968
3969   for (reg = 0; reg < 4; reg++)
3970     r[reg] = -1;
3971
3972   while (found < 4)
3973     {
3974       int id;
3975
3976       c = *charbuf++;
3977       if (c == '\n')
3978         break;
3979       charset = char_charset (c, charset_list, NULL);
3980       id = CHARSET_ID (charset);
3981       reg = CODING_ISO_REQUEST (coding, id);
3982       if (reg >= 0 && r[reg] < 0)
3983         {
3984           found++;
3985           r[reg] = id;
3986         }
3987     }
3988
3989   if (found)
3990     {
3991       for (reg = 0; reg < 4; reg++)
3992         if (r[reg] >= 0
3993             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3994           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3995     }
3996
3997   return dst;
3998 }
3999
4000 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4001
4002 static int
4003 encode_coding_iso_2022 (coding)
4004      struct coding_system *coding;
4005 {
4006   int multibytep = coding->dst_multibyte;
4007   int *charbuf = coding->charbuf;
4008   int *charbuf_end = charbuf + coding->charbuf_used;
4009   unsigned char *dst = coding->destination + coding->produced;
4010   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4011   int safe_room = 16;
4012   int bol_designation
4013     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4014        && CODING_ISO_BOL (coding));
4015   int produced_chars = 0;
4016   Lisp_Object attrs, eol_type, charset_list;
4017   int ascii_compatible;
4018   int c;
4019   int preferred_charset_id = -1;
4020
4021   CODING_GET_INFO (coding, attrs, charset_list);
4022   eol_type = CODING_ID_EOL_TYPE (coding->id);
4023   if (VECTORP (eol_type))
4024     eol_type = Qunix;
4025
4026   setup_iso_safe_charsets (attrs);
4027   /* Charset list may have been changed.  */
4028   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4029   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
4030
4031   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4032
4033   while (charbuf < charbuf_end)
4034     {
4035       ASSURE_DESTINATION (safe_room);
4036
4037       if (bol_designation)
4038         {
4039           unsigned char *dst_prev = dst;
4040
4041           /* We have to produce designation sequences if any now.  */
4042           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4043           bol_designation = 0;
4044           /* We are sure that designation sequences are all ASCII bytes.  */
4045           produced_chars += dst - dst_prev;
4046         }
4047
4048       c = *charbuf++;
4049
4050       if (c < 0)
4051         {
4052           /* Handle an annotation.  */
4053           switch (*charbuf)
4054             {
4055             case CODING_ANNOTATE_COMPOSITION_MASK:
4056               /* Not yet implemented.  */
4057               break;
4058             case CODING_ANNOTATE_CHARSET_MASK:
4059               preferred_charset_id = charbuf[2];
4060               if (preferred_charset_id >= 0
4061                   && NILP (Fmemq (make_number (preferred_charset_id),
4062                                   charset_list)))
4063                 preferred_charset_id = -1;
4064               break;
4065             default:
4066               abort ();
4067             }
4068           charbuf += -c - 1;
4069           continue;
4070         }
4071
4072       /* Now encode the character C.  */
4073       if (c < 0x20 || c == 0x7F)
4074         {
4075           if (c == '\n'
4076               || (c == '\r' && EQ (eol_type, Qmac)))
4077             {
4078               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4079                 ENCODE_RESET_PLANE_AND_REGISTER ();
4080               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4081                 {
4082                   int i;
4083
4084                   for (i = 0; i < 4; i++)
4085                     CODING_ISO_DESIGNATION (coding, i)
4086                       = CODING_ISO_INITIAL (coding, i);
4087                 }
4088               bol_designation
4089                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4090             }
4091           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4092             ENCODE_RESET_PLANE_AND_REGISTER ();
4093           EMIT_ONE_ASCII_BYTE (c);
4094         }
4095       else if (ASCII_CHAR_P (c))
4096         {
4097           if (ascii_compatible)
4098             EMIT_ONE_ASCII_BYTE (c);
4099           else
4100             {
4101               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4102               ENCODE_ISO_CHARACTER (charset, c);
4103             }
4104         }
4105       else if (CHAR_BYTE8_P (c))
4106         {
4107           c = CHAR_TO_BYTE8 (c);
4108           EMIT_ONE_BYTE (c);
4109         }
4110       else
4111         {
4112           struct charset *charset;
4113
4114           if (preferred_charset_id >= 0)
4115             {
4116               charset = CHARSET_FROM_ID (preferred_charset_id);
4117               if (! CHAR_CHARSET_P (c, charset))
4118                 charset = char_charset (c, charset_list, NULL);
4119             }
4120           else
4121             charset = char_charset (c, charset_list, NULL);
4122           if (!charset)
4123             {
4124               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4125                 {
4126                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4127                   charset = CHARSET_FROM_ID (charset_ascii);
4128                 }
4129               else
4130                 {
4131                   c = coding->default_char;
4132                   charset = char_charset (c, charset_list, NULL);
4133                 }
4134             }
4135           ENCODE_ISO_CHARACTER (charset, c);
4136         }
4137     }
4138
4139   if (coding->mode & CODING_MODE_LAST_BLOCK
4140       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4141     {
4142       ASSURE_DESTINATION (safe_room);
4143       ENCODE_RESET_PLANE_AND_REGISTER ();
4144     }
4145   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4146   CODING_ISO_BOL (coding) = bol_designation;
4147   coding->produced_char += produced_chars;
4148   coding->produced = dst - coding->destination;
4149   return 0;
4150 }
4151
4152 \f
4153 /*** 8,9. SJIS and BIG5 handlers ***/
4154
4155 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4156    quite widely.  So, for the moment, Emacs supports them in the bare
4157    C code.  But, in the future, they may be supported only by CCL.  */
4158
4159 /* SJIS is a coding system encoding three character sets: ASCII, right
4160    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4161    as is.  A character of charset katakana-jisx0201 is encoded by
4162    "position-code + 0x80".  A character of charset japanese-jisx0208
4163    is encoded in 2-byte but two position-codes are divided and shifted
4164    so that it fit in the range below.
4165
4166    --- CODE RANGE of SJIS ---
4167    (character set)      (range)
4168    ASCII                0x00 .. 0x7F
4169    KATAKANA-JISX0201    0xA0 .. 0xDF
4170    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4171             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4172    -------------------------------
4173
4174 */
4175
4176 /* BIG5 is a coding system encoding two character sets: ASCII and
4177    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4178    character set and is encoded in two-byte.
4179
4180    --- CODE RANGE of BIG5 ---
4181    (character set)      (range)
4182    ASCII                0x00 .. 0x7F
4183    Big5 (1st byte)      0xA1 .. 0xFE
4184         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4185    --------------------------
4186
4187   */
4188
4189 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4190    Check if a text is encoded in SJIS.  If it is, return
4191    CATEGORY_MASK_SJIS, else return 0.  */
4192
4193 static int
4194 detect_coding_sjis (coding, detect_info)
4195      struct coding_system *coding;
4196      struct coding_detection_info *detect_info;
4197 {
4198   const unsigned char *src = coding->source, *src_base;
4199   const unsigned char *src_end = coding->source + coding->src_bytes;
4200   int multibytep = coding->src_multibyte;
4201   int consumed_chars = 0;
4202   int found = 0;
4203   int c;
4204
4205   detect_info->checked |= CATEGORY_MASK_SJIS;
4206   /* A coding system of this category is always ASCII compatible.  */
4207   src += coding->head_ascii;
4208
4209   while (1)
4210     {
4211       src_base = src;
4212       ONE_MORE_BYTE (c);
4213       if (c < 0x80)
4214         continue;
4215       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4216         {
4217           ONE_MORE_BYTE (c);
4218           if (c < 0x40 || c == 0x7F || c > 0xFC)
4219             break;
4220           found = CATEGORY_MASK_SJIS;
4221         }
4222       else if (c >= 0xA0 && c < 0xE0)
4223         found = CATEGORY_MASK_SJIS;
4224       else
4225         break;
4226     }
4227   detect_info->rejected |= CATEGORY_MASK_SJIS;
4228   return 0;
4229
4230  no_more_source:
4231   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4232     {
4233       detect_info->rejected |= CATEGORY_MASK_SJIS;
4234       return 0;
4235     }
4236   detect_info->found |= found;
4237   return 1;
4238 }
4239
4240 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4241    Check if a text is encoded in BIG5.  If it is, return
4242    CATEGORY_MASK_BIG5, else return 0.  */
4243
4244 static int
4245 detect_coding_big5 (coding, detect_info)
4246      struct coding_system *coding;
4247      struct coding_detection_info *detect_info;
4248 {
4249   const unsigned char *src = coding->source, *src_base;
4250   const unsigned char *src_end = coding->source + coding->src_bytes;
4251   int multibytep = coding->src_multibyte;
4252   int consumed_chars = 0;
4253   int found = 0;
4254   int c;
4255
4256   detect_info->checked |= CATEGORY_MASK_BIG5;
4257   /* A coding system of this category is always ASCII compatible.  */
4258   src += coding->head_ascii;
4259
4260   while (1)
4261     {
4262       src_base = src;
4263       ONE_MORE_BYTE (c);
4264       if (c < 0x80)
4265         continue;
4266       if (c >= 0xA1)
4267         {
4268           ONE_MORE_BYTE (c);
4269           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4270             return 0;
4271           found = CATEGORY_MASK_BIG5;
4272         }
4273       else
4274         break;
4275     }
4276   detect_info->rejected |= CATEGORY_MASK_BIG5;
4277   return 0;
4278
4279  no_more_source:
4280   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4281     {
4282       detect_info->rejected |= CATEGORY_MASK_BIG5;
4283       return 0;
4284     }
4285   detect_info->found |= found;
4286   return 1;
4287 }
4288
4289 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4290    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4291
4292 static void
4293 decode_coding_sjis (coding)
4294      struct coding_system *coding;
4295 {
4296   const unsigned char *src = coding->source + coding->consumed;
4297   const unsigned char *src_end = coding->source + coding->src_bytes;
4298   const unsigned char *src_base;
4299   int *charbuf = coding->charbuf + coding->charbuf_used;
4300   int *charbuf_end
4301     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4302   int consumed_chars = 0, consumed_chars_base;
4303   int multibytep = coding->src_multibyte;
4304   struct charset *charset_roman, *charset_kanji, *charset_kana;
4305   struct charset *charset_kanji2;
4306   Lisp_Object attrs, charset_list, val;
4307   int char_offset = coding->produced_char;
4308   int last_offset = char_offset;
4309   int last_id = charset_ascii;
4310   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4311   int byte_after_cr = -1;
4312
4313   CODING_GET_INFO (coding, attrs, charset_list);
4314
4315   val = charset_list;
4316   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4317   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4318   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4319   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4320
4321   while (1)
4322     {
4323       int c, c1;
4324       struct charset *charset;
4325
4326       src_base = src;
4327       consumed_chars_base = consumed_chars;
4328
4329       if (charbuf >= charbuf_end)
4330         break;
4331
4332       if (byte_after_cr >= 0)
4333         c = byte_after_cr, byte_after_cr = -1;
4334       else
4335         ONE_MORE_BYTE (c);
4336       if (c < 0)
4337         goto invalid_code;
4338       if (c < 0x80)
4339         {
4340           if (eol_crlf && c == '\r')
4341             ONE_MORE_BYTE (byte_after_cr);
4342           charset = charset_roman;
4343         }
4344       else if (c == 0x80 || c == 0xA0)
4345         goto invalid_code;
4346       else if (c >= 0xA1 && c <= 0xDF)
4347         {
4348           /* SJIS -> JISX0201-Kana */
4349           c &= 0x7F;
4350           charset = charset_kana;
4351         }
4352       else if (c <= 0xEF)
4353         {
4354           /* SJIS -> JISX0208 */
4355           ONE_MORE_BYTE (c1);
4356           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4357             goto invalid_code;
4358           c = (c << 8) | c1;
4359           SJIS_TO_JIS (c);
4360           charset = charset_kanji;
4361         }
4362       else if (c <= 0xFC && charset_kanji2)
4363         {
4364           /* SJIS -> JISX0213-2 */
4365           ONE_MORE_BYTE (c1);
4366           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4367             goto invalid_code;
4368           c = (c << 8) | c1;
4369           SJIS_TO_JIS2 (c);
4370           charset = charset_kanji2;
4371         }
4372       else
4373         goto invalid_code;
4374       if (charset->id != charset_ascii
4375           && last_id != charset->id)
4376         {
4377           if (last_id != charset_ascii)
4378             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4379           last_id = charset->id;
4380           last_offset = char_offset;
4381         }
4382       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4383       *charbuf++ = c;
4384       char_offset++;
4385       continue;
4386
4387     invalid_code:
4388       src = src_base;
4389       consumed_chars = consumed_chars_base;
4390       ONE_MORE_BYTE (c);
4391       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4392       char_offset++;
4393       coding->errors++;
4394     }
4395
4396  no_more_source:
4397   if (last_id != charset_ascii)
4398     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4399   coding->consumed_char += consumed_chars_base;
4400   coding->consumed = src_base - coding->source;
4401   coding->charbuf_used = charbuf - coding->charbuf;
4402 }
4403
4404 static void
4405 decode_coding_big5 (coding)
4406      struct coding_system *coding;
4407 {
4408   const unsigned char *src = coding->source + coding->consumed;
4409   const unsigned char *src_end = coding->source + coding->src_bytes;
4410   const unsigned char *src_base;
4411   int *charbuf = coding->charbuf + coding->charbuf_used;
4412   int *charbuf_end
4413     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4414   int consumed_chars = 0, consumed_chars_base;
4415   int multibytep = coding->src_multibyte;
4416   struct charset *charset_roman, *charset_big5;
4417   Lisp_Object attrs, charset_list, val;
4418   int char_offset = coding->produced_char;
4419   int last_offset = char_offset;
4420   int last_id = charset_ascii;
4421   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4422   int byte_after_cr = -1;
4423
4424   CODING_GET_INFO (coding, attrs, charset_list);
4425   val = charset_list;
4426   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4427   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4428
4429   while (1)
4430     {
4431       int c, c1;
4432       struct charset *charset;
4433
4434       src_base = src;
4435       consumed_chars_base = consumed_chars;
4436
4437       if (charbuf >= charbuf_end)
4438         break;
4439
4440       if (byte_after_cr >= 0)
4441         c = byte_after_cr, byte_after_cr = -1;
4442       else
4443         ONE_MORE_BYTE (c);
4444
4445       if (c < 0)
4446         goto invalid_code;
4447       if (c < 0x80)
4448         {
4449           if (eol_crlf && c == '\r')
4450             ONE_MORE_BYTE (byte_after_cr);
4451           charset = charset_roman;
4452         }
4453       else
4454         {
4455           /* BIG5 -> Big5 */
4456           if (c < 0xA1 || c > 0xFE)
4457             goto invalid_code;
4458           ONE_MORE_BYTE (c1);
4459           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4460             goto invalid_code;
4461           c = c << 8 | c1;
4462           charset = charset_big5;
4463         }
4464       if (charset->id != charset_ascii
4465           && last_id != charset->id)
4466         {
4467           if (last_id != charset_ascii)
4468             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4469           last_id = charset->id;
4470           last_offset = char_offset;
4471         }
4472       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4473       *charbuf++ = c;
4474       char_offset++;
4475       continue;
4476
4477     invalid_code:
4478       src = src_base;
4479       consumed_chars = consumed_chars_base;
4480       ONE_MORE_BYTE (c);
4481       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4482       char_offset++;
4483       coding->errors++;
4484     }
4485
4486  no_more_source:
4487   if (last_id != charset_ascii)
4488     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4489   coding->consumed_char += consumed_chars_base;
4490   coding->consumed = src_base - coding->source;
4491   coding->charbuf_used = charbuf - coding->charbuf;
4492 }
4493
4494 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4495    This function can encode charsets `ascii', `katakana-jisx0201',
4496    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4497    are sure that all these charsets are registered as official charset
4498    (i.e. do not have extended leading-codes).  Characters of other
4499    charsets are produced without any encoding.  If SJIS_P is 1, encode
4500    SJIS text, else encode BIG5 text.  */
4501
4502 static int
4503 encode_coding_sjis (coding)
4504      struct coding_system *coding;
4505 {
4506   int multibytep = coding->dst_multibyte;
4507   int *charbuf = coding->charbuf;
4508   int *charbuf_end = charbuf + coding->charbuf_used;
4509   unsigned char *dst = coding->destination + coding->produced;
4510   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4511   int safe_room = 4;
4512   int produced_chars = 0;
4513   Lisp_Object attrs, charset_list, val;
4514   int ascii_compatible;
4515   struct charset *charset_roman, *charset_kanji, *charset_kana;
4516   struct charset *charset_kanji2;
4517   int c;
4518
4519   CODING_GET_INFO (coding, attrs, charset_list);
4520   val = charset_list;
4521   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4522   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4523   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4524   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4525
4526   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4527
4528   while (charbuf < charbuf_end)
4529     {
4530       ASSURE_DESTINATION (safe_room);
4531       c = *charbuf++;
4532       /* Now encode the character C.  */
4533       if (ASCII_CHAR_P (c) && ascii_compatible)
4534         EMIT_ONE_ASCII_BYTE (c);
4535       else if (CHAR_BYTE8_P (c))
4536         {
4537           c = CHAR_TO_BYTE8 (c);
4538           EMIT_ONE_BYTE (c);
4539         }
4540       else
4541         {
4542           unsigned code;
4543           struct charset *charset = char_charset (c, charset_list, &code);
4544
4545           if (!charset)
4546             {
4547               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4548                 {
4549                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4550                   charset = CHARSET_FROM_ID (charset_ascii);
4551                 }
4552               else
4553                 {
4554                   c = coding->default_char;
4555                   charset = char_charset (c, charset_list, &code);
4556                 }
4557             }
4558           if (code == CHARSET_INVALID_CODE (charset))
4559             abort ();
4560           if (charset == charset_kanji)
4561             {
4562               int c1, c2;
4563               JIS_TO_SJIS (code);
4564               c1 = code >> 8, c2 = code & 0xFF;
4565               EMIT_TWO_BYTES (c1, c2);
4566             }
4567           else if (charset == charset_kana)
4568             EMIT_ONE_BYTE (code | 0x80);
4569           else if (charset_kanji2 && charset == charset_kanji2)
4570             {
4571               int c1, c2;
4572
4573               c1 = code >> 8;
4574               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4575                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4576                 {
4577                   JIS_TO_SJIS2 (code);
4578                   c1 = code >> 8, c2 = code & 0xFF;
4579                   EMIT_TWO_BYTES (c1, c2);
4580                 }
4581               else
4582                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4583             }
4584           else
4585             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4586         }
4587     }
4588   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4589   coding->produced_char += produced_chars;
4590   coding->produced = dst - coding->destination;
4591   return 0;
4592 }
4593
4594 static int
4595 encode_coding_big5 (coding)
4596      struct coding_system *coding;
4597 {
4598   int multibytep = coding->dst_multibyte;
4599   int *charbuf = coding->charbuf;
4600   int *charbuf_end = charbuf + coding->charbuf_used;
4601   unsigned char *dst = coding->destination + coding->produced;
4602   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4603   int safe_room = 4;
4604   int produced_chars = 0;
4605   Lisp_Object attrs, charset_list, val;
4606   int ascii_compatible;
4607   struct charset *charset_roman, *charset_big5;
4608   int c;
4609
4610   CODING_GET_INFO (coding, attrs, charset_list);
4611   val = charset_list;
4612   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4613   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4614   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4615
4616   while (charbuf < charbuf_end)
4617     {
4618       ASSURE_DESTINATION (safe_room);
4619       c = *charbuf++;
4620       /* Now encode the character C.  */
4621       if (ASCII_CHAR_P (c) && ascii_compatible)
4622         EMIT_ONE_ASCII_BYTE (c);
4623       else if (CHAR_BYTE8_P (c))
4624         {
4625           c = CHAR_TO_BYTE8 (c);
4626           EMIT_ONE_BYTE (c);
4627         }
4628       else
4629         {
4630           unsigned code;
4631           struct charset *charset = char_charset (c, charset_list, &code);
4632
4633           if (! charset)
4634             {
4635               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4636                 {
4637                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4638                   charset = CHARSET_FROM_ID (charset_ascii);
4639                 }
4640               else
4641                 {
4642                   c = coding->default_char;
4643                   charset = char_charset (c, charset_list, &code);
4644                 }
4645             }
4646           if (code == CHARSET_INVALID_CODE (charset))
4647             abort ();
4648           if (charset == charset_big5)
4649             {
4650               int c1, c2;
4651
4652               c1 = code >> 8, c2 = code & 0xFF;
4653               EMIT_TWO_BYTES (c1, c2);
4654             }
4655           else
4656             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4657         }
4658     }
4659   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4660   coding->produced_char += produced_chars;
4661   coding->produced = dst - coding->destination;
4662   return 0;
4663 }
4664
4665 \f
4666 /*** 10. CCL handlers ***/
4667
4668 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4669    Check if a text is encoded in a coding system of which
4670    encoder/decoder are written in CCL program.  If it is, return
4671    CATEGORY_MASK_CCL, else return 0.  */
4672
4673 static int
4674 detect_coding_ccl (coding, detect_info)
4675      struct coding_system *coding;
4676      struct coding_detection_info *detect_info;
4677 {
4678   const unsigned char *src = coding->source, *src_base;
4679   const unsigned char *src_end = coding->source + coding->src_bytes;
4680   int multibytep = coding->src_multibyte;
4681   int consumed_chars = 0;
4682   int found = 0;
4683   unsigned char *valids;
4684   int head_ascii = coding->head_ascii;
4685   Lisp_Object attrs;
4686
4687   detect_info->checked |= CATEGORY_MASK_CCL;
4688
4689   coding = &coding_categories[coding_category_ccl];
4690   valids = CODING_CCL_VALIDS (coding);
4691   attrs = CODING_ID_ATTRS (coding->id);
4692   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4693     src += head_ascii;
4694
4695   while (1)
4696     {
4697       int c;
4698
4699       src_base = src;
4700       ONE_MORE_BYTE (c);
4701       if (c < 0 || ! valids[c])
4702         break;
4703       if ((valids[c] > 1))
4704         found = CATEGORY_MASK_CCL;
4705     }
4706   detect_info->rejected |= CATEGORY_MASK_CCL;
4707   return 0;
4708
4709  no_more_source:
4710   detect_info->found |= found;
4711   return 1;
4712 }
4713
4714 static void
4715 decode_coding_ccl (coding)
4716      struct coding_system *coding;
4717 {
4718   const unsigned char *src = coding->source + coding->consumed;
4719   const unsigned char *src_end = coding->source + coding->src_bytes;
4720   int *charbuf = coding->charbuf + coding->charbuf_used;
4721   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4722   int consumed_chars = 0;
4723   int multibytep = coding->src_multibyte;
4724   struct ccl_program ccl;
4725   int source_charbuf[1024];
4726   int source_byteidx[1024];
4727   Lisp_Object attrs, charset_list;
4728
4729   CODING_GET_INFO (coding, attrs, charset_list);
4730   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4731
4732   while (src < src_end)
4733     {
4734       const unsigned char *p = src;
4735       int *source, *source_end;
4736       int i = 0;
4737
4738       if (multibytep)
4739         while (i < 1024 && p < src_end)
4740           {
4741             source_byteidx[i] = p - src;
4742             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4743           }
4744       else
4745         while (i < 1024 && p < src_end)
4746           source_charbuf[i++] = *p++;
4747
4748       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4749         ccl.last_block = 1;
4750
4751       source = source_charbuf;
4752       source_end = source + i;
4753       while (source < source_end)
4754         {
4755           ccl_driver (&ccl, source, charbuf,
4756                       source_end - source, charbuf_end - charbuf,
4757                       charset_list);
4758           source += ccl.consumed;
4759           charbuf += ccl.produced;
4760           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4761             break;
4762         }
4763       if (source < source_end)
4764         src += source_byteidx[source - source_charbuf];
4765       else
4766         src = p;
4767       consumed_chars += source - source_charbuf;
4768
4769       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4770           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4771         break;
4772     }
4773
4774   switch (ccl.status)
4775     {
4776     case CCL_STAT_SUSPEND_BY_SRC:
4777       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4778       break;
4779     case CCL_STAT_SUSPEND_BY_DST:
4780       break;
4781     case CCL_STAT_QUIT:
4782     case CCL_STAT_INVALID_CMD:
4783       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4784       break;
4785     default:
4786       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4787       break;
4788     }
4789   coding->consumed_char += consumed_chars;
4790   coding->consumed = src - coding->source;
4791   coding->charbuf_used = charbuf - coding->charbuf;
4792 }
4793
4794 static int
4795 encode_coding_ccl (coding)
4796      struct coding_system *coding;
4797 {
4798   struct ccl_program ccl;
4799   int multibytep = coding->dst_multibyte;
4800   int *charbuf = coding->charbuf;
4801   int *charbuf_end = charbuf + coding->charbuf_used;
4802   unsigned char *dst = coding->destination + coding->produced;
4803   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4804   int destination_charbuf[1024];
4805   int i, produced_chars = 0;
4806   Lisp_Object attrs, charset_list;
4807
4808   CODING_GET_INFO (coding, attrs, charset_list);
4809   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4810
4811   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4812   ccl.dst_multibyte = coding->dst_multibyte;
4813
4814   while (charbuf < charbuf_end)
4815     {
4816       ccl_driver (&ccl, charbuf, destination_charbuf,
4817                   charbuf_end - charbuf, 1024, charset_list);
4818       if (multibytep)
4819         {
4820           ASSURE_DESTINATION (ccl.produced * 2);
4821           for (i = 0; i < ccl.produced; i++)
4822             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4823         }
4824       else
4825         {
4826           ASSURE_DESTINATION (ccl.produced);
4827           for (i = 0; i < ccl.produced; i++)
4828             *dst++ = destination_charbuf[i] & 0xFF;
4829           produced_chars += ccl.produced;
4830         }
4831       charbuf += ccl.consumed;
4832       if (ccl.status == CCL_STAT_QUIT
4833           || ccl.status == CCL_STAT_INVALID_CMD)
4834         break;
4835     }
4836
4837   switch (ccl.status)
4838     {
4839     case CCL_STAT_SUSPEND_BY_SRC:
4840       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4841       break;
4842     case CCL_STAT_SUSPEND_BY_DST:
4843       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4844       break;
4845     case CCL_STAT_QUIT:
4846     case CCL_STAT_INVALID_CMD:
4847       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4848       break;
4849     default:
4850       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4851       break;
4852     }
4853
4854   coding->produced_char += produced_chars;
4855   coding->produced = dst - coding->destination;
4856   return 0;
4857 }
4858
4859
4860 \f
4861 /*** 10, 11. no-conversion handlers ***/
4862
4863 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4864
4865 static void
4866 decode_coding_raw_text (coding)
4867      struct coding_system *coding;
4868 {
4869   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4870
4871   coding->chars_at_source = 1;
4872   coding->consumed_char = coding->src_chars;
4873   coding->consumed = coding->src_bytes;
4874   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4875     {
4876       coding->consumed_char--;
4877       coding->consumed--;
4878       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4879     }
4880   else
4881     record_conversion_result (coding, CODING_RESULT_SUCCESS);
4882 }
4883
4884 static int
4885 encode_coding_raw_text (coding)
4886      struct coding_system *coding;
4887 {
4888   int multibytep = coding->dst_multibyte;
4889   int *charbuf = coding->charbuf;
4890   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4891   unsigned char *dst = coding->destination + coding->produced;
4892   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4893   int produced_chars = 0;
4894   int c;
4895
4896   if (multibytep)
4897     {
4898       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4899
4900       if (coding->src_multibyte)
4901         while (charbuf < charbuf_end)
4902           {
4903             ASSURE_DESTINATION (safe_room);
4904             c = *charbuf++;
4905             if (ASCII_CHAR_P (c))
4906               EMIT_ONE_ASCII_BYTE (c);
4907             else if (CHAR_BYTE8_P (c))
4908               {
4909                 c = CHAR_TO_BYTE8 (c);
4910                 EMIT_ONE_BYTE (c);
4911               }
4912             else
4913               {
4914                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4915
4916                 CHAR_STRING_ADVANCE (c, p1);
4917                 while (p0 < p1)
4918                   {
4919                     EMIT_ONE_BYTE (*p0);
4920                     p0++;
4921                   }
4922               }
4923           }
4924       else
4925         while (charbuf < charbuf_end)
4926           {
4927             ASSURE_DESTINATION (safe_room);
4928             c = *charbuf++;
4929             EMIT_ONE_BYTE (c);
4930           }
4931     }
4932   else
4933     {
4934       if (coding->src_multibyte)
4935         {
4936           int safe_room = MAX_MULTIBYTE_LENGTH;
4937
4938           while (charbuf < charbuf_end)
4939             {
4940               ASSURE_DESTINATION (safe_room);
4941               c = *charbuf++;
4942               if (ASCII_CHAR_P (c))
4943                 *dst++ = c;
4944               else if (CHAR_BYTE8_P (c))
4945                 *dst++ = CHAR_TO_BYTE8 (c);
4946               else
4947                 CHAR_STRING_ADVANCE (c, dst);
4948             }
4949         }
4950       else
4951         {
4952           ASSURE_DESTINATION (charbuf_end - charbuf);
4953           while (charbuf < charbuf_end && dst < dst_end)
4954             *dst++ = *charbuf++;
4955         }
4956       produced_chars = dst - (coding->destination + coding->produced);
4957     }
4958   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4959   coding->produced_char += produced_chars;
4960   coding->produced = dst - coding->destination;
4961   return 0;
4962 }
4963
4964 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4965    Check if a text is encoded in a charset-based coding system.  If it
4966    is, return 1, else return 0.  */
4967
4968 static int
4969 detect_coding_charset (coding, detect_info)
4970      struct coding_system *coding;
4971      struct coding_detection_info *detect_info;
4972 {
4973   const unsigned char *src = coding->source, *src_base;
4974   const unsigned char *src_end = coding->source + coding->src_bytes;
4975   int multibytep = coding->src_multibyte;
4976   int consumed_chars = 0;
4977   Lisp_Object attrs, valids;
4978   int found = 0;
4979   int head_ascii = coding->head_ascii;
4980
4981   detect_info->checked |= CATEGORY_MASK_CHARSET;
4982
4983   coding = &coding_categories[coding_category_charset];
4984   attrs = CODING_ID_ATTRS (coding->id);
4985   valids = AREF (attrs, coding_attr_charset_valids);
4986
4987   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4988     src += head_ascii;
4989
4990   while (1)
4991     {
4992       int c;
4993       Lisp_Object val;
4994       struct charset *charset;
4995       int dim, idx;
4996
4997       src_base = src;
4998       ONE_MORE_BYTE (c);
4999       if (c < 0)
5000         continue;
5001       val = AREF (valids, c);
5002       if (NILP (val))
5003         break;
5004       if (c >= 0x80)
5005         found = CATEGORY_MASK_CHARSET;
5006       if (INTEGERP (val))
5007         {
5008           charset = CHARSET_FROM_ID (XFASTINT (val));
5009           dim = CHARSET_DIMENSION (charset);
5010           for (idx = 1; idx < dim; idx++)
5011             {
5012               if (src == src_end)
5013                 goto too_short;
5014               ONE_MORE_BYTE (c);
5015               if (c < charset->code_space[(dim - 1 - idx) * 2]
5016                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5017                 break;
5018             }
5019           if (idx < dim)
5020             break;
5021         }
5022       else
5023         {
5024           idx = 1;
5025           for (; CONSP (val); val = XCDR (val))
5026             {
5027               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5028               dim = CHARSET_DIMENSION (charset);
5029               while (idx < dim)
5030                 {
5031                   if (src == src_end)
5032                     goto too_short;
5033                   ONE_MORE_BYTE (c);
5034                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5035                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5036                     break;
5037                   idx++;
5038                 }
5039               if (idx == dim)
5040                 {
5041                   val = Qnil;
5042                   break;
5043                 }
5044             }
5045           if (CONSP (val))
5046             break;
5047         }
5048     }
5049  too_short:
5050   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5051   return 0;
5052
5053  no_more_source:
5054   detect_info->found |= found;
5055   return 1;
5056 }
5057
5058 static void
5059 decode_coding_charset (coding)
5060      struct coding_system *coding;
5061 {
5062   const unsigned char *src = coding->source + coding->consumed;
5063   const unsigned char *src_end = coding->source + coding->src_bytes;
5064   const unsigned char *src_base;
5065   int *charbuf = coding->charbuf + coding->charbuf_used;
5066   int *charbuf_end
5067     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
5068   int consumed_chars = 0, consumed_chars_base;
5069   int multibytep = coding->src_multibyte;
5070   Lisp_Object attrs, charset_list, valids;
5071   int char_offset = coding->produced_char;
5072   int last_offset = char_offset;
5073   int last_id = charset_ascii;
5074   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5075   int byte_after_cr = -1;
5076
5077   CODING_GET_INFO (coding, attrs, charset_list);
5078   valids = AREF (attrs, coding_attr_charset_valids);
5079
5080   while (1)
5081     {
5082       int c;
5083       Lisp_Object val;
5084       struct charset *charset;
5085       int dim;
5086       int len = 1;
5087       unsigned code;
5088
5089       src_base = src;
5090       consumed_chars_base = consumed_chars;
5091
5092       if (charbuf >= charbuf_end)
5093         break;
5094
5095       if (byte_after_cr >= 0)
5096         {
5097           c = byte_after_cr;
5098           byte_after_cr = -1;
5099         }
5100       else
5101         {
5102           ONE_MORE_BYTE (c);
5103           if (eol_crlf && c == '\r')
5104             ONE_MORE_BYTE (byte_after_cr);
5105         }
5106       if (c < 0)
5107         goto invalid_code;
5108       code = c;
5109
5110       val = AREF (valids, c);
5111       if (NILP (val))
5112         goto invalid_code;
5113       if (INTEGERP (val))
5114         {
5115           charset = CHARSET_FROM_ID (XFASTINT (val));
5116           dim = CHARSET_DIMENSION (charset);
5117           while (len < dim)
5118             {
5119               ONE_MORE_BYTE (c);
5120               code = (code << 8) | c;
5121               len++;
5122             }
5123           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5124                               charset, code, c);
5125         }
5126       else
5127         {
5128           /* VAL is a list of charset IDs.  It is assured that the
5129              list is sorted by charset dimensions (smaller one
5130              comes first).  */
5131           while (CONSP (val))
5132             {
5133               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5134               dim = CHARSET_DIMENSION (charset);
5135               while (len < dim)
5136                 {
5137                   ONE_MORE_BYTE (c);
5138                   code = (code << 8) | c;
5139                   len++;
5140                 }
5141               CODING_DECODE_CHAR (coding, src, src_base,
5142                                   src_end, charset, code, c);
5143               if (c >= 0)
5144                 break;
5145               val = XCDR (val);
5146             }
5147         }
5148       if (c < 0)
5149         goto invalid_code;
5150       if (charset->id != charset_ascii
5151           && last_id != charset->id)
5152         {
5153           if (last_id != charset_ascii)
5154             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5155           last_id = charset->id;
5156           last_offset = char_offset;
5157         }
5158
5159       *charbuf++ = c;
5160       char_offset++;
5161       continue;
5162
5163     invalid_code:
5164       src = src_base;
5165       consumed_chars = consumed_chars_base;
5166       ONE_MORE_BYTE (c);
5167       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5168       char_offset++;
5169       coding->errors++;
5170     }
5171
5172  no_more_source:
5173   if (last_id != charset_ascii)
5174     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5175   coding->consumed_char += consumed_chars_base;
5176   coding->consumed = src_base - coding->source;
5177   coding->charbuf_used = charbuf - coding->charbuf;
5178 }
5179
5180 static int
5181 encode_coding_charset (coding)
5182      struct coding_system *coding;
5183 {
5184   int multibytep = coding->dst_multibyte;
5185   int *charbuf = coding->charbuf;
5186   int *charbuf_end = charbuf + coding->charbuf_used;
5187   unsigned char *dst = coding->destination + coding->produced;
5188   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5189   int safe_room = MAX_MULTIBYTE_LENGTH;
5190   int produced_chars = 0;
5191   Lisp_Object attrs, charset_list;
5192   int ascii_compatible;
5193   int c;
5194
5195   CODING_GET_INFO (coding, attrs, charset_list);
5196   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5197
5198   while (charbuf < charbuf_end)
5199     {
5200       struct charset *charset;
5201       unsigned code;
5202
5203       ASSURE_DESTINATION (safe_room);
5204       c = *charbuf++;
5205       if (ascii_compatible && ASCII_CHAR_P (c))
5206         EMIT_ONE_ASCII_BYTE (c);
5207       else if (CHAR_BYTE8_P (c))
5208         {
5209           c = CHAR_TO_BYTE8 (c);
5210           EMIT_ONE_BYTE (c);
5211         }
5212       else
5213         {
5214           charset = char_charset (c, charset_list, &code);
5215           if (charset)
5216             {
5217               if (CHARSET_DIMENSION (charset) == 1)
5218                 EMIT_ONE_BYTE (code);
5219               else if (CHARSET_DIMENSION (charset) == 2)
5220                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5221               else if (CHARSET_DIMENSION (charset) == 3)
5222                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5223               else
5224                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5225                                  (code >> 8) & 0xFF, code & 0xFF);
5226             }
5227           else
5228             {
5229               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5230                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5231               else
5232                 c = coding->default_char;
5233               EMIT_ONE_BYTE (c);
5234             }
5235         }
5236     }
5237
5238   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5239   coding->produced_char += produced_chars;
5240   coding->produced = dst - coding->destination;
5241   return 0;
5242 }
5243
5244 \f
5245 /*** 7. C library functions ***/
5246
5247 /* Setup coding context CODING from information about CODING_SYSTEM.
5248    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5249    CODING_SYSTEM is invalid, signal an error.  */
5250
5251 void
5252 setup_coding_system (coding_system, coding)
5253      Lisp_Object coding_system;
5254      struct coding_system *coding;
5255 {
5256   Lisp_Object attrs;
5257   Lisp_Object eol_type;
5258   Lisp_Object coding_type;
5259   Lisp_Object val;
5260
5261   if (NILP (coding_system))
5262     coding_system = Qundecided;
5263
5264   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5265
5266   attrs = CODING_ID_ATTRS (coding->id);
5267   eol_type = CODING_ID_EOL_TYPE (coding->id);
5268
5269   coding->mode = 0;
5270   coding->head_ascii = -1;
5271   if (VECTORP (eol_type))
5272     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5273                             | CODING_REQUIRE_DETECTION_MASK);
5274   else if (! EQ (eol_type, Qunix))
5275     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5276                             | CODING_REQUIRE_ENCODING_MASK);
5277   else
5278     coding->common_flags = 0;
5279   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5280     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5281   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5282     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5283   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5284     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5285
5286   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5287   coding->max_charset_id = SCHARS (val) - 1;
5288   coding->safe_charsets = (char *) SDATA (val);
5289   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5290
5291   coding_type = CODING_ATTR_TYPE (attrs);
5292   if (EQ (coding_type, Qundecided))
5293     {
5294       coding->detector = NULL;
5295       coding->decoder = decode_coding_raw_text;
5296       coding->encoder = encode_coding_raw_text;
5297       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5298     }
5299   else if (EQ (coding_type, Qiso_2022))
5300     {
5301       int i;
5302       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5303
5304       /* Invoke graphic register 0 to plane 0.  */
5305       CODING_ISO_INVOCATION (coding, 0) = 0;
5306       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5307       CODING_ISO_INVOCATION (coding, 1)
5308         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5309       /* Setup the initial status of designation.  */
5310       for (i = 0; i < 4; i++)
5311         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5312       /* Not single shifting initially.  */
5313       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5314       /* Beginning of buffer should also be regarded as bol. */
5315       CODING_ISO_BOL (coding) = 1;
5316       coding->detector = detect_coding_iso_2022;
5317       coding->decoder = decode_coding_iso_2022;
5318       coding->encoder = encode_coding_iso_2022;
5319       if (flags & CODING_ISO_FLAG_SAFE)
5320         coding->mode |= CODING_MODE_SAFE_ENCODING;
5321       coding->common_flags
5322         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5323             | CODING_REQUIRE_FLUSHING_MASK);
5324       if (flags & CODING_ISO_FLAG_COMPOSITION)
5325         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5326       if (flags & CODING_ISO_FLAG_DESIGNATION)
5327         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5328       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5329         {
5330           setup_iso_safe_charsets (attrs);
5331           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5332           coding->max_charset_id = SCHARS (val) - 1;
5333           coding->safe_charsets = (char *) SDATA (val);
5334         }
5335       CODING_ISO_FLAGS (coding) = flags;
5336     }
5337   else if (EQ (coding_type, Qcharset))
5338     {
5339       coding->detector = detect_coding_charset;
5340       coding->decoder = decode_coding_charset;
5341       coding->encoder = encode_coding_charset;
5342       coding->common_flags
5343         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5344     }
5345   else if (EQ (coding_type, Qutf_8))
5346     {
5347       val = AREF (attrs, coding_attr_utf_bom);
5348       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5349                                    : EQ (val, Qt) ? utf_with_bom
5350                                    : utf_without_bom);
5351       coding->detector = detect_coding_utf_8;
5352       coding->decoder = decode_coding_utf_8;
5353       coding->encoder = encode_coding_utf_8;
5354       coding->common_flags
5355         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5356       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5357         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5358     }
5359   else if (EQ (coding_type, Qutf_16))
5360     {
5361       val = AREF (attrs, coding_attr_utf_bom);
5362       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5363                                     : EQ (val, Qt) ? utf_with_bom
5364                                     : utf_without_bom);
5365       val = AREF (attrs, coding_attr_utf_16_endian);
5366       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5367                                        : utf_16_little_endian);
5368       CODING_UTF_16_SURROGATE (coding) = 0;
5369       coding->detector = detect_coding_utf_16;
5370       coding->decoder = decode_coding_utf_16;
5371       coding->encoder = encode_coding_utf_16;
5372       coding->common_flags
5373         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5374       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5375         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5376     }
5377   else if (EQ (coding_type, Qccl))
5378     {
5379       coding->detector = detect_coding_ccl;
5380       coding->decoder = decode_coding_ccl;
5381       coding->encoder = encode_coding_ccl;
5382       coding->common_flags
5383         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5384             | CODING_REQUIRE_FLUSHING_MASK);
5385     }
5386   else if (EQ (coding_type, Qemacs_mule))
5387     {
5388       coding->detector = detect_coding_emacs_mule;
5389       coding->decoder = decode_coding_emacs_mule;
5390       coding->encoder = encode_coding_emacs_mule;
5391       coding->common_flags
5392         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5393       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5394           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5395         {
5396           Lisp_Object tail, safe_charsets;
5397           int max_charset_id = 0;
5398
5399           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5400                tail = XCDR (tail))
5401             if (max_charset_id < XFASTINT (XCAR (tail)))
5402               max_charset_id = XFASTINT (XCAR (tail));
5403           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5404                                         make_number (255));
5405           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5406                tail = XCDR (tail))
5407             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5408           coding->max_charset_id = max_charset_id;
5409           coding->safe_charsets = (char *) SDATA (safe_charsets);
5410         }
5411     }
5412   else if (EQ (coding_type, Qshift_jis))
5413     {
5414       coding->detector = detect_coding_sjis;
5415       coding->decoder = decode_coding_sjis;
5416       coding->encoder = encode_coding_sjis;
5417       coding->common_flags
5418         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5419     }
5420   else if (EQ (coding_type, Qbig5))
5421     {
5422       coding->detector = detect_coding_big5;
5423       coding->decoder = decode_coding_big5;
5424       coding->encoder = encode_coding_big5;
5425       coding->common_flags
5426         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5427     }
5428   else                          /* EQ (coding_type, Qraw_text) */
5429     {
5430       coding->detector = NULL;
5431       coding->decoder = decode_coding_raw_text;
5432       coding->encoder = encode_coding_raw_text;
5433       if (! EQ (eol_type, Qunix))
5434         {
5435           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5436           if (! VECTORP (eol_type))
5437             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5438         }
5439
5440     }
5441
5442   return;
5443 }
5444
5445 /* Return a list of charsets supported by CODING.  */
5446
5447 Lisp_Object
5448 coding_charset_list (coding)
5449      struct coding_system *coding;
5450 {
5451   Lisp_Object attrs, charset_list;
5452
5453   CODING_GET_INFO (coding, attrs, charset_list);
5454   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5455     {
5456       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5457
5458       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5459         charset_list = Viso_2022_charset_list;
5460     }
5461   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5462     {
5463       charset_list = Vemacs_mule_charset_list;
5464     }
5465   return charset_list;
5466 }
5467
5468
5469 /* Return raw-text or one of its subsidiaries that has the same
5470    eol_type as CODING-SYSTEM.  */
5471
5472 Lisp_Object
5473 raw_text_coding_system (coding_system)
5474      Lisp_Object coding_system;
5475 {
5476   Lisp_Object spec, attrs;
5477   Lisp_Object eol_type, raw_text_eol_type;
5478
5479   if (NILP (coding_system))
5480     return Qraw_text;
5481   spec = CODING_SYSTEM_SPEC (coding_system);
5482   attrs = AREF (spec, 0);
5483
5484   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5485     return coding_system;
5486
5487   eol_type = AREF (spec, 2);
5488   if (VECTORP (eol_type))
5489     return Qraw_text;
5490   spec = CODING_SYSTEM_SPEC (Qraw_text);
5491   raw_text_eol_type = AREF (spec, 2);
5492   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5493           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5494           : AREF (raw_text_eol_type, 2));
5495 }
5496
5497
5498 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5499    does, return one of the subsidiary that has the same eol-spec as
5500    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
5501    inherit end-of-line format from the system's setting
5502    (system_eol_type).  */
5503
5504 Lisp_Object
5505 coding_inherit_eol_type (coding_system, parent)
5506      Lisp_Object coding_system, parent;
5507 {
5508   Lisp_Object spec, eol_type;
5509
5510   if (NILP (coding_system))
5511     coding_system = Qraw_text;
5512   spec = CODING_SYSTEM_SPEC (coding_system);
5513   eol_type = AREF (spec, 2);
5514   if (VECTORP (eol_type))
5515     {
5516       Lisp_Object parent_eol_type;
5517
5518       if (! NILP (parent))
5519         {
5520           Lisp_Object parent_spec;
5521
5522           parent_spec = CODING_SYSTEM_SPEC (parent);
5523           parent_eol_type = AREF (parent_spec, 2);
5524         }
5525       else
5526         parent_eol_type = system_eol_type;
5527       if (EQ (parent_eol_type, Qunix))
5528         coding_system = AREF (eol_type, 0);
5529       else if (EQ (parent_eol_type, Qdos))
5530         coding_system = AREF (eol_type, 1);
5531       else if (EQ (parent_eol_type, Qmac))
5532         coding_system = AREF (eol_type, 2);
5533     }
5534   return coding_system;
5535 }
5536
5537 /* Emacs has a mechanism to automatically detect a coding system if it
5538    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5539    it's impossible to distinguish some coding systems accurately
5540    because they use the same range of codes.  So, at first, coding
5541    systems are categorized into 7, those are:
5542
5543    o coding-category-emacs-mule
5544
5545         The category for a coding system which has the same code range
5546         as Emacs' internal format.  Assigned the coding-system (Lisp
5547         symbol) `emacs-mule' by default.
5548
5549    o coding-category-sjis
5550
5551         The category for a coding system which has the same code range
5552         as SJIS.  Assigned the coding-system (Lisp
5553         symbol) `japanese-shift-jis' by default.
5554
5555    o coding-category-iso-7
5556
5557         The category for a coding system which has the same code range
5558         as ISO2022 of 7-bit environment.  This doesn't use any locking
5559         shift and single shift functions.  This can encode/decode all
5560         charsets.  Assigned the coding-system (Lisp symbol)
5561         `iso-2022-7bit' by default.
5562
5563    o coding-category-iso-7-tight
5564
5565         Same as coding-category-iso-7 except that this can
5566         encode/decode only the specified charsets.
5567
5568    o coding-category-iso-8-1
5569
5570         The category for a coding system which has the same code range
5571         as ISO2022 of 8-bit environment and graphic plane 1 used only
5572         for DIMENSION1 charset.  This doesn't use any locking shift
5573         and single shift functions.  Assigned the coding-system (Lisp
5574         symbol) `iso-latin-1' by default.
5575
5576    o coding-category-iso-8-2
5577
5578         The category for a coding system which has the same code range
5579         as ISO2022 of 8-bit environment and graphic plane 1 used only
5580         for DIMENSION2 charset.  This doesn't use any locking shift
5581         and single shift functions.  Assigned the coding-system (Lisp
5582         symbol) `japanese-iso-8bit' by default.
5583
5584    o coding-category-iso-7-else
5585
5586         The category for a coding system which has the same code range
5587         as ISO2022 of 7-bit environemnt but uses locking shift or
5588         single shift functions.  Assigned the coding-system (Lisp
5589         symbol) `iso-2022-7bit-lock' by default.
5590
5591    o coding-category-iso-8-else
5592
5593         The category for a coding system which has the same code range
5594         as ISO2022 of 8-bit environemnt but uses locking shift or
5595         single shift functions.  Assigned the coding-system (Lisp
5596         symbol) `iso-2022-8bit-ss2' by default.
5597
5598    o coding-category-big5
5599
5600         The category for a coding system which has the same code range
5601         as BIG5.  Assigned the coding-system (Lisp symbol)
5602         `cn-big5' by default.
5603
5604    o coding-category-utf-8
5605
5606         The category for a coding system which has the same code range
5607         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5608         symbol) `utf-8' by default.
5609
5610    o coding-category-utf-16-be
5611
5612         The category for a coding system in which a text has an
5613         Unicode signature (cf. Unicode Standard) in the order of BIG
5614         endian at the head.  Assigned the coding-system (Lisp symbol)
5615         `utf-16-be' by default.
5616
5617    o coding-category-utf-16-le
5618
5619         The category for a coding system in which a text has an
5620         Unicode signature (cf. Unicode Standard) in the order of
5621         LITTLE endian at the head.  Assigned the coding-system (Lisp
5622         symbol) `utf-16-le' by default.
5623
5624    o coding-category-ccl
5625
5626         The category for a coding system of which encoder/decoder is
5627         written in CCL programs.  The default value is nil, i.e., no
5628         coding system is assigned.
5629
5630    o coding-category-binary
5631
5632         The category for a coding system not categorized in any of the
5633         above.  Assigned the coding-system (Lisp symbol)
5634         `no-conversion' by default.
5635
5636    Each of them is a Lisp symbol and the value is an actual
5637    `coding-system's (this is also a Lisp symbol) assigned by a user.
5638    What Emacs does actually is to detect a category of coding system.
5639    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5640    decide only one possible category, it selects a category of the
5641    highest priority.  Priorities of categories are also specified by a
5642    user in a Lisp variable `coding-category-list'.
5643
5644 */
5645
5646 #define EOL_SEEN_NONE   0
5647 #define EOL_SEEN_LF     1
5648 #define EOL_SEEN_CR     2
5649 #define EOL_SEEN_CRLF   4
5650
5651 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5652    SOURCE is encoded.  If CATEGORY is one of
5653    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5654    two-byte, else they are encoded by one-byte.
5655
5656    Return one of EOL_SEEN_XXX.  */
5657
5658 #define MAX_EOL_CHECK_COUNT 3
5659
5660 static int
5661 detect_eol (source, src_bytes, category)
5662      const unsigned char *source;
5663      EMACS_INT src_bytes;
5664      enum coding_category category;
5665 {
5666   const unsigned char *src = source, *src_end = src + src_bytes;
5667   unsigned char c;
5668   int total  = 0;
5669   int eol_seen = EOL_SEEN_NONE;
5670
5671   if ((1 << category) & CATEGORY_MASK_UTF_16)
5672     {
5673       int msb, lsb;
5674
5675       msb = category == (coding_category_utf_16_le
5676                          | coding_category_utf_16_le_nosig);
5677       lsb = 1 - msb;
5678
5679       while (src + 1 < src_end)
5680         {
5681           c = src[lsb];
5682           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5683             {
5684               int this_eol;
5685
5686               if (c == '\n')
5687                 this_eol = EOL_SEEN_LF;
5688               else if (src + 3 >= src_end
5689                        || src[msb + 2] != 0
5690                        || src[lsb + 2] != '\n')
5691                 this_eol = EOL_SEEN_CR;
5692               else
5693                 this_eol = EOL_SEEN_CRLF;
5694
5695               if (eol_seen == EOL_SEEN_NONE)
5696                 /* This is the first end-of-line.  */
5697                 eol_seen = this_eol;
5698               else if (eol_seen != this_eol)
5699                 {
5700                   /* The found type is different from what found before.  */
5701                   eol_seen = EOL_SEEN_LF;
5702                   break;
5703                 }
5704               if (++total == MAX_EOL_CHECK_COUNT)
5705                 break;
5706             }
5707           src += 2;
5708         }
5709     }
5710   else
5711     {
5712       while (src < src_end)
5713         {
5714           c = *src++;
5715           if (c == '\n' || c == '\r')
5716             {
5717               int this_eol;
5718
5719               if (c == '\n')
5720                 this_eol = EOL_SEEN_LF;
5721               else if (src >= src_end || *src != '\n')
5722                 this_eol = EOL_SEEN_CR;
5723               else
5724                 this_eol = EOL_SEEN_CRLF, src++;
5725
5726               if (eol_seen == EOL_SEEN_NONE)
5727                 /* This is the first end-of-line.  */
5728                 eol_seen = this_eol;
5729               else if (eol_seen != this_eol)
5730                 {
5731                   /* The found type is different from what found before.  */
5732                   eol_seen = EOL_SEEN_LF;
5733                   break;
5734                 }
5735               if (++total == MAX_EOL_CHECK_COUNT)
5736                 break;
5737             }
5738         }
5739     }
5740   return eol_seen;
5741 }
5742
5743
5744 static Lisp_Object
5745 adjust_coding_eol_type (coding, eol_seen)
5746      struct coding_system *coding;
5747      int eol_seen;
5748 {
5749   Lisp_Object eol_type;
5750
5751   eol_type = CODING_ID_EOL_TYPE (coding->id);
5752   if (eol_seen & EOL_SEEN_LF)
5753     {
5754       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5755       eol_type = Qunix;
5756     }
5757   else if (eol_seen & EOL_SEEN_CRLF)
5758     {
5759       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5760       eol_type = Qdos;
5761     }
5762   else if (eol_seen & EOL_SEEN_CR)
5763     {
5764       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5765       eol_type = Qmac;
5766     }
5767   return eol_type;
5768 }
5769
5770 /* Detect how a text specified in CODING is encoded.  If a coding
5771    system is detected, update fields of CODING by the detected coding
5772    system.  */
5773
5774 void
5775 detect_coding (coding)
5776      struct coding_system *coding;
5777 {
5778   const unsigned char *src, *src_end;
5779
5780   coding->consumed = coding->consumed_char = 0;
5781   coding->produced = coding->produced_char = 0;
5782   coding_set_source (coding);
5783
5784   src_end = coding->source + coding->src_bytes;
5785   coding->head_ascii = 0;
5786
5787   /* If we have not yet decided the text encoding type, detect it
5788      now.  */
5789   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5790     {
5791       int c, i;
5792       struct coding_detection_info detect_info;
5793       int null_byte_found = 0, eight_bit_found = 0;
5794
5795       detect_info.checked = detect_info.found = detect_info.rejected = 0;
5796       for (src = coding->source; src < src_end; src++)
5797         {
5798           c = *src;
5799           if (c & 0x80)
5800             {
5801               eight_bit_found = 1;
5802               if (null_byte_found)
5803                 break;
5804             }
5805           else if (c < 0x20)
5806             {
5807               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5808                   && ! inhibit_iso_escape_detection
5809                   && ! detect_info.checked)
5810                 {
5811                   if (detect_coding_iso_2022 (coding, &detect_info))
5812                     {
5813                       /* We have scanned the whole data.  */
5814                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5815                         {
5816                           /* We didn't find an 8-bit code.  We may
5817                              have found a null-byte, but it's very
5818                              rare that a binary file confirm to
5819                              ISO-2022.  */
5820                           src = src_end;
5821                           coding->head_ascii = src - coding->source;
5822                         }
5823                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
5824                       break;
5825                     }
5826                 }
5827               else if (! c)
5828                 {
5829                   null_byte_found = 1;
5830                   if (eight_bit_found)
5831                     break;
5832                 }
5833               if (! eight_bit_found)
5834                 coding->head_ascii++;
5835             }
5836           else if (! eight_bit_found)
5837             coding->head_ascii++;
5838         }
5839
5840       if (null_byte_found || eight_bit_found
5841           || coding->head_ascii < coding->src_bytes
5842           || detect_info.found)
5843         {
5844           enum coding_category category;
5845           struct coding_system *this;
5846
5847           if (coding->head_ascii == coding->src_bytes)
5848             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
5849             for (i = 0; i < coding_category_raw_text; i++)
5850               {
5851                 category = coding_priorities[i];
5852                 this = coding_categories + category;
5853                 if (detect_info.found & (1 << category))
5854                   break;
5855               }
5856           else
5857             {
5858               if (null_byte_found)
5859                 {
5860                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
5861                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
5862                 }
5863               for (i = 0; i < coding_category_raw_text; i++)
5864                 {
5865                   category = coding_priorities[i];
5866                   this = coding_categories + category;
5867                   if (this->id < 0)
5868                     {
5869                       /* No coding system of this category is defined.  */
5870                       detect_info.rejected |= (1 << category);
5871                     }
5872                   else if (category >= coding_category_raw_text)
5873                     continue;
5874                   else if (detect_info.checked & (1 << category))
5875                     {
5876                       if (detect_info.found & (1 << category))
5877                         break;
5878                     }
5879                   else if ((*(this->detector)) (coding, &detect_info)
5880                            && detect_info.found & (1 << category))
5881                     {
5882                       if (category == coding_category_utf_16_auto)
5883                         {
5884                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5885                             category = coding_category_utf_16_le;
5886                           else
5887                             category = coding_category_utf_16_be;
5888                         }
5889                       break;
5890                     }
5891                 }
5892             }
5893
5894           if (i < coding_category_raw_text)
5895             setup_coding_system (CODING_ID_NAME (this->id), coding);
5896           else if (null_byte_found)
5897             setup_coding_system (Qno_conversion, coding);
5898           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
5899                    == CATEGORY_MASK_ANY)
5900             setup_coding_system (Qraw_text, coding);
5901           else if (detect_info.rejected)
5902             for (i = 0; i < coding_category_raw_text; i++)
5903               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5904                 {
5905                   this = coding_categories + coding_priorities[i];
5906                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5907                   break;
5908                 }
5909         }
5910     }
5911   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5912            == coding_category_utf_8_auto)
5913     {
5914       Lisp_Object coding_systems;
5915       struct coding_detection_info detect_info;
5916
5917       coding_systems
5918         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
5919       detect_info.found = detect_info.rejected = 0;
5920       coding->head_ascii = 0;
5921       if (CONSP (coding_systems)
5922           && detect_coding_utf_8 (coding, &detect_info))
5923         {
5924           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
5925             setup_coding_system (XCAR (coding_systems), coding);
5926           else
5927             setup_coding_system (XCDR (coding_systems), coding);
5928         }
5929     }
5930   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5931            == coding_category_utf_16_auto)
5932     {
5933       Lisp_Object coding_systems;
5934       struct coding_detection_info detect_info;
5935
5936       coding_systems
5937         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
5938       detect_info.found = detect_info.rejected = 0;
5939       coding->head_ascii = 0;
5940       if (CONSP (coding_systems)
5941           && detect_coding_utf_16 (coding, &detect_info))
5942         {
5943           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5944             setup_coding_system (XCAR (coding_systems), coding);
5945           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5946             setup_coding_system (XCDR (coding_systems), coding);
5947         }
5948     }
5949 }
5950
5951
5952 static void
5953 decode_eol (coding)
5954      struct coding_system *coding;
5955 {
5956   Lisp_Object eol_type;
5957   unsigned char *p, *pbeg, *pend;
5958
5959   eol_type = CODING_ID_EOL_TYPE (coding->id);
5960   if (EQ (eol_type, Qunix))
5961     return;
5962
5963   if (NILP (coding->dst_object))
5964     pbeg = coding->destination;
5965   else
5966     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5967   pend = pbeg + coding->produced;
5968
5969   if (VECTORP (eol_type))
5970     {
5971       int eol_seen = EOL_SEEN_NONE;
5972
5973       for (p = pbeg; p < pend; p++)
5974         {
5975           if (*p == '\n')
5976             eol_seen |= EOL_SEEN_LF;
5977           else if (*p == '\r')
5978             {
5979               if (p + 1 < pend && *(p + 1) == '\n')
5980                 {
5981                   eol_seen |= EOL_SEEN_CRLF;
5982                   p++;
5983                 }
5984               else
5985                 eol_seen |= EOL_SEEN_CR;
5986             }
5987         }
5988       if (eol_seen != EOL_SEEN_NONE
5989           && eol_seen != EOL_SEEN_LF
5990           && eol_seen != EOL_SEEN_CRLF
5991           && eol_seen != EOL_SEEN_CR)
5992         eol_seen = EOL_SEEN_LF;
5993       if (eol_seen != EOL_SEEN_NONE)
5994         eol_type = adjust_coding_eol_type (coding, eol_seen);
5995     }
5996
5997   if (EQ (eol_type, Qmac))
5998     {
5999       for (p = pbeg; p < pend; p++)
6000         if (*p == '\r')
6001           *p = '\n';
6002     }
6003   else if (EQ (eol_type, Qdos))
6004     {
6005       int n = 0;
6006
6007       if (NILP (coding->dst_object))
6008         {
6009           /* Start deleting '\r' from the tail to minimize the memory
6010              movement.  */
6011           for (p = pend - 2; p >= pbeg; p--)
6012             if (*p == '\r')
6013               {
6014                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6015                 n++;
6016               }
6017         }
6018       else
6019         {
6020           int pos_byte = coding->dst_pos_byte;
6021           int pos = coding->dst_pos;
6022           int pos_end = pos + coding->produced_char - 1;
6023
6024           while (pos < pos_end)
6025             {
6026               p = BYTE_POS_ADDR (pos_byte);
6027               if (*p == '\r' && p[1] == '\n')
6028                 {
6029                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6030                   n++;
6031                   pos_end--;
6032                 }
6033               pos++;
6034               if (coding->dst_multibyte)
6035                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6036               else
6037                 pos_byte++;
6038             }
6039         }
6040       coding->produced -= n;
6041       coding->produced_char -= n;
6042     }
6043 }
6044
6045
6046 /* Return a translation table (or list of them) from coding system
6047    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6048    decoding (ENCODEP is zero). */
6049
6050 static Lisp_Object
6051 get_translation_table (attrs, encodep, max_lookup)
6052      Lisp_Object attrs;
6053      int encodep, *max_lookup;
6054 {
6055   Lisp_Object standard, translation_table;
6056   Lisp_Object val;
6057
6058   if (encodep)
6059     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6060       standard = Vstandard_translation_table_for_encode;
6061   else
6062     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6063       standard = Vstandard_translation_table_for_decode;
6064   if (NILP (translation_table))
6065     translation_table = standard;
6066   else
6067     {
6068       if (SYMBOLP (translation_table))
6069         translation_table = Fget (translation_table, Qtranslation_table);
6070       else if (CONSP (translation_table))
6071         {
6072           translation_table = Fcopy_sequence (translation_table);
6073           for (val = translation_table; CONSP (val); val = XCDR (val))
6074             if (SYMBOLP (XCAR (val)))
6075               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6076         }
6077       if (CHAR_TABLE_P (standard))
6078         {
6079           if (CONSP (translation_table))
6080             translation_table = nconc2 (translation_table,
6081                                         Fcons (standard, Qnil));
6082           else
6083             translation_table = Fcons (translation_table,
6084                                        Fcons (standard, Qnil));
6085         }
6086     }
6087
6088   if (max_lookup)
6089     {
6090       *max_lookup = 1;
6091       if (CHAR_TABLE_P (translation_table)
6092           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6093         {
6094           val = XCHAR_TABLE (translation_table)->extras[1];
6095           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6096             *max_lookup = XFASTINT (val);
6097         }
6098       else if (CONSP (translation_table))
6099         {
6100           Lisp_Object tail, val;
6101
6102           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6103             if (CHAR_TABLE_P (XCAR (tail))
6104                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6105               {
6106                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6107                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6108                   *max_lookup = XFASTINT (val);
6109               }
6110         }
6111     }
6112   return translation_table;
6113 }
6114
6115 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6116   do {                                                          \
6117     trans = Qnil;                                               \
6118     if (CHAR_TABLE_P (table))                                   \
6119       {                                                         \
6120         trans = CHAR_TABLE_REF (table, c);                      \
6121         if (CHARACTERP (trans))                                 \
6122           c = XFASTINT (trans), trans = Qnil;                   \
6123       }                                                         \
6124     else if (CONSP (table))                                     \
6125       {                                                         \
6126         Lisp_Object tail;                                       \
6127                                                                 \
6128         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6129           if (CHAR_TABLE_P (XCAR (tail)))                       \
6130             {                                                   \
6131               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6132               if (CHARACTERP (trans))                           \
6133                 c = XFASTINT (trans), trans = Qnil;             \
6134               else if (! NILP (trans))                          \
6135                 break;                                          \
6136             }                                                   \
6137       }                                                         \
6138   } while (0)
6139
6140
6141 static Lisp_Object
6142 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
6143      Lisp_Object val;
6144      int *buf, *buf_end;
6145      int last_block;
6146      int *from_nchars, *to_nchars;
6147 {
6148   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
6149      [TO-CHAR ...].  */
6150   if (CONSP (val))
6151     {
6152       Lisp_Object from, tail;
6153       int i, len;
6154
6155       for (tail = val; CONSP (tail); tail = XCDR (tail))
6156         {
6157           val = XCAR (tail);
6158           from = XCAR (val);
6159           len = ASIZE (from);
6160           for (i = 0; i < len; i++)
6161             {
6162               if (buf + i == buf_end)
6163                 {
6164                   if (! last_block)
6165                     return Qt;
6166                   break;
6167                 }
6168               if (XINT (AREF (from, i)) != buf[i])
6169                 break;
6170             }
6171           if (i == len)
6172             {
6173               val = XCDR (val);
6174               *from_nchars = len;
6175               break;
6176             }
6177         }
6178       if (! CONSP (tail))
6179         return Qnil;
6180     }
6181   if (VECTORP (val))
6182     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6183   else
6184     *buf = XINT (val);
6185   return val;
6186 }
6187
6188
6189 static int
6190 produce_chars (coding, translation_table, last_block)
6191      struct coding_system *coding;
6192      Lisp_Object translation_table;
6193      int last_block;
6194 {
6195   unsigned char *dst = coding->destination + coding->produced;
6196   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6197   EMACS_INT produced;
6198   EMACS_INT produced_chars = 0;
6199   int carryover = 0;
6200
6201   if (! coding->chars_at_source)
6202     {
6203       /* Source characters are in coding->charbuf.  */
6204       int *buf = coding->charbuf;
6205       int *buf_end = buf + coding->charbuf_used;
6206
6207       if (EQ (coding->src_object, coding->dst_object))
6208         {
6209           coding_set_source (coding);
6210           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6211         }
6212
6213       while (buf < buf_end)
6214         {
6215           int c = *buf, i;
6216
6217           if (c >= 0)
6218             {
6219               int from_nchars = 1, to_nchars = 1;
6220               Lisp_Object trans = Qnil;
6221
6222               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6223               if (! NILP (trans))
6224                 {
6225                   trans = get_translation (trans, buf, buf_end, last_block,
6226                                            &from_nchars, &to_nchars);
6227                   if (EQ (trans, Qt))
6228                     break;
6229                   c = *buf;
6230                 }
6231
6232               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6233                 {
6234                   dst = alloc_destination (coding,
6235                                            buf_end - buf
6236                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6237                                            dst);
6238                   if (EQ (coding->src_object, coding->dst_object))
6239                     {
6240                       coding_set_source (coding);
6241                       dst_end = ((unsigned char *) coding->source) + coding->consumed;
6242                     }
6243                   else
6244                     dst_end = coding->destination + coding->dst_bytes;
6245                 }
6246
6247               for (i = 0; i < to_nchars; i++)
6248                 {
6249                   if (i > 0)
6250                     c = XINT (AREF (trans, i));
6251                   if (coding->dst_multibyte
6252                       || ! CHAR_BYTE8_P (c))
6253                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6254                   else
6255                     *dst++ = CHAR_TO_BYTE8 (c);
6256                 }
6257               produced_chars += to_nchars;
6258               *buf++ = to_nchars;
6259               while (--from_nchars > 0)
6260                 *buf++ = 0;
6261             }
6262           else
6263             /* This is an annotation datum.  (-C) is the length.  */
6264             buf += -c;
6265         }
6266       carryover = buf_end - buf;
6267     }
6268   else
6269     {
6270       /* Source characters are at coding->source.  */
6271       const unsigned char *src = coding->source;
6272       const unsigned char *src_end = src + coding->consumed;
6273
6274       if (EQ (coding->dst_object, coding->src_object))
6275         dst_end = (unsigned char *) src;
6276       if (coding->src_multibyte != coding->dst_multibyte)
6277         {
6278           if (coding->src_multibyte)
6279             {
6280               int multibytep = 1;
6281               EMACS_INT consumed_chars;
6282
6283               while (1)
6284                 {
6285                   const unsigned char *src_base = src;
6286                   int c;
6287
6288                   ONE_MORE_BYTE (c);
6289                   if (dst == dst_end)
6290                     {
6291                       if (EQ (coding->src_object, coding->dst_object))
6292                         dst_end = (unsigned char *) src;
6293                       if (dst == dst_end)
6294                         {
6295                           EMACS_INT offset = src - coding->source;
6296
6297                           dst = alloc_destination (coding, src_end - src + 1,
6298                                                    dst);
6299                           dst_end = coding->destination + coding->dst_bytes;
6300                           coding_set_source (coding);
6301                           src = coding->source + offset;
6302                           src_end = coding->source + coding->src_bytes;
6303                           if (EQ (coding->src_object, coding->dst_object))
6304                             dst_end = (unsigned char *) src;
6305                         }
6306                     }
6307                   *dst++ = c;
6308                   produced_chars++;
6309                 }
6310             no_more_source:
6311               ;
6312             }
6313           else
6314             while (src < src_end)
6315               {
6316                 int multibytep = 1;
6317                 int c = *src++;
6318
6319                 if (dst >= dst_end - 1)
6320                   {
6321                     if (EQ (coding->src_object, coding->dst_object))
6322                       dst_end = (unsigned char *) src;
6323                     if (dst >= dst_end - 1)
6324                       {
6325                         EMACS_INT offset = src - coding->source;
6326                         EMACS_INT more_bytes;
6327
6328                         if (EQ (coding->src_object, coding->dst_object))
6329                           more_bytes = ((src_end - src) / 2) + 2;
6330                         else
6331                           more_bytes = src_end - src + 2;
6332                         dst = alloc_destination (coding, more_bytes, dst);
6333                         dst_end = coding->destination + coding->dst_bytes;
6334                         coding_set_source (coding);
6335                         src = coding->source + offset;
6336                         src_end = coding->source + coding->src_bytes;
6337                         if (EQ (coding->src_object, coding->dst_object))
6338                           dst_end = (unsigned char *) src;
6339                       }
6340                   }
6341                 EMIT_ONE_BYTE (c);
6342               }
6343         }
6344       else
6345         {
6346           if (!EQ (coding->src_object, coding->dst_object))
6347             {
6348               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6349
6350               if (require > 0)
6351                 {
6352                   EMACS_INT offset = src - coding->source;
6353
6354                   dst = alloc_destination (coding, require, dst);
6355                   coding_set_source (coding);
6356                   src = coding->source + offset;
6357                   src_end = coding->source + coding->src_bytes;
6358                 }
6359             }
6360           produced_chars = coding->consumed_char;
6361           while (src < src_end)
6362             *dst++ = *src++;
6363         }
6364     }
6365
6366   produced = dst - (coding->destination + coding->produced);
6367   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6368     insert_from_gap (produced_chars, produced);
6369   coding->produced += produced;
6370   coding->produced_char += produced_chars;
6371   return carryover;
6372 }
6373
6374 /* Compose text in CODING->object according to the annotation data at
6375    CHARBUF.  CHARBUF is an array:
6376      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
6377  */
6378
6379 static INLINE void
6380 produce_composition (coding, charbuf, pos)
6381      struct coding_system *coding;
6382      int *charbuf;
6383      EMACS_INT pos;
6384 {
6385   int len;
6386   EMACS_INT to;
6387   enum composition_method method;
6388   Lisp_Object components;
6389
6390   len = -charbuf[0];
6391   to = pos + charbuf[2];
6392   if (to <= pos)
6393     return;
6394   method = (enum composition_method) (charbuf[3]);
6395
6396   if (method == COMPOSITION_RELATIVE)
6397     components = Qnil;
6398   else if (method >= COMPOSITION_WITH_RULE
6399            && method <= COMPOSITION_WITH_RULE_ALTCHARS)
6400     {
6401       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6402       int i;
6403
6404       len -= 4;
6405       charbuf += 4;
6406       for (i = 0; i < len; i++)
6407         {
6408           args[i] = make_number (charbuf[i]);
6409           if (charbuf[i] < 0)
6410             return;
6411         }
6412       components = (method == COMPOSITION_WITH_ALTCHARS
6413                     ? Fstring (len, args) : Fvector (len, args));
6414     }
6415   else
6416     return;
6417   compose_text (pos, to, components, Qnil, coding->dst_object);
6418 }
6419
6420
6421 /* Put `charset' property on text in CODING->object according to
6422    the annotation data at CHARBUF.  CHARBUF is an array:
6423      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6424  */
6425
6426 static INLINE void
6427 produce_charset (coding, charbuf, pos)
6428      struct coding_system *coding;
6429      int *charbuf;
6430      EMACS_INT pos;
6431 {
6432   EMACS_INT from = pos - charbuf[2];
6433   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6434
6435   Fput_text_property (make_number (from), make_number (pos),
6436                       Qcharset, CHARSET_NAME (charset),
6437                       coding->dst_object);
6438 }
6439
6440
6441 #define CHARBUF_SIZE 0x4000
6442
6443 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6444   do {                                                                  \
6445     int size = CHARBUF_SIZE;;                                           \
6446                                                                         \
6447     coding->charbuf = NULL;                                             \
6448     while (size > 1024)                                                 \
6449       {                                                                 \
6450         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6451         if (coding->charbuf)                                            \
6452           break;                                                        \
6453         size >>= 1;                                                     \
6454       }                                                                 \
6455     if (! coding->charbuf)                                              \
6456       {                                                                 \
6457         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6458         return coding->result;                                          \
6459       }                                                                 \
6460     coding->charbuf_size = size;                                        \
6461   } while (0)
6462
6463
6464 static void
6465 produce_annotation (coding, pos)
6466      struct coding_system *coding;
6467      EMACS_INT pos;
6468 {
6469   int *charbuf = coding->charbuf;
6470   int *charbuf_end = charbuf + coding->charbuf_used;
6471
6472   if (NILP (coding->dst_object))
6473     return;
6474
6475   while (charbuf < charbuf_end)
6476     {
6477       if (*charbuf >= 0)
6478         pos += *charbuf++;
6479       else
6480         {
6481           int len = -*charbuf;
6482           switch (charbuf[1])
6483             {
6484             case CODING_ANNOTATE_COMPOSITION_MASK:
6485               produce_composition (coding, charbuf, pos);
6486               break;
6487             case CODING_ANNOTATE_CHARSET_MASK:
6488               produce_charset (coding, charbuf, pos);
6489               break;
6490             default:
6491               abort ();
6492             }
6493           charbuf += len;
6494         }
6495     }
6496 }
6497
6498 /* Decode the data at CODING->src_object into CODING->dst_object.
6499    CODING->src_object is a buffer, a string, or nil.
6500    CODING->dst_object is a buffer.
6501
6502    If CODING->src_object is a buffer, it must be the current buffer.
6503    In this case, if CODING->src_pos is positive, it is a position of
6504    the source text in the buffer, otherwise, the source text is in the
6505    gap area of the buffer, and CODING->src_pos specifies the offset of
6506    the text from GPT (which must be the same as PT).  If this is the
6507    same buffer as CODING->dst_object, CODING->src_pos must be
6508    negative.
6509
6510    If CODING->src_object is a string, CODING->src_pos is an index to
6511    that string.
6512
6513    If CODING->src_object is nil, CODING->source must already point to
6514    the non-relocatable memory area.  In this case, CODING->src_pos is
6515    an offset from CODING->source.
6516
6517    The decoded data is inserted at the current point of the buffer
6518    CODING->dst_object.
6519 */
6520
6521 static int
6522 decode_coding (coding)
6523      struct coding_system *coding;
6524 {
6525   Lisp_Object attrs;
6526   Lisp_Object undo_list;
6527   Lisp_Object translation_table;
6528   int carryover;
6529   int i;
6530
6531   if (BUFFERP (coding->src_object)
6532       && coding->src_pos > 0
6533       && coding->src_pos < GPT
6534       && coding->src_pos + coding->src_chars > GPT)
6535     move_gap_both (coding->src_pos, coding->src_pos_byte);
6536
6537   undo_list = Qt;
6538   if (BUFFERP (coding->dst_object))
6539     {
6540       if (current_buffer != XBUFFER (coding->dst_object))
6541         set_buffer_internal (XBUFFER (coding->dst_object));
6542       if (GPT != PT)
6543         move_gap_both (PT, PT_BYTE);
6544       undo_list = current_buffer->undo_list;
6545       current_buffer->undo_list = Qt;
6546     }
6547
6548   coding->consumed = coding->consumed_char = 0;
6549   coding->produced = coding->produced_char = 0;
6550   coding->chars_at_source = 0;
6551   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6552   coding->errors = 0;
6553
6554   ALLOC_CONVERSION_WORK_AREA (coding);
6555
6556   attrs = CODING_ID_ATTRS (coding->id);
6557   translation_table = get_translation_table (attrs, 0, NULL);
6558
6559   carryover = 0;
6560   do
6561     {
6562       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6563
6564       coding_set_source (coding);
6565       coding->annotated = 0;
6566       coding->charbuf_used = carryover;
6567       (*(coding->decoder)) (coding);
6568       coding_set_destination (coding);
6569       carryover = produce_chars (coding, translation_table, 0);
6570       if (coding->annotated)
6571         produce_annotation (coding, pos);
6572       for (i = 0; i < carryover; i++)
6573         coding->charbuf[i]
6574           = coding->charbuf[coding->charbuf_used - carryover + i];
6575     }
6576   while (coding->consumed < coding->src_bytes
6577          && (coding->result == CODING_RESULT_SUCCESS
6578              || coding->result == CODING_RESULT_INVALID_SRC));
6579
6580   if (carryover > 0)
6581     {
6582       coding_set_destination (coding);
6583       coding->charbuf_used = carryover;
6584       produce_chars (coding, translation_table, 1);
6585     }
6586
6587   coding->carryover_bytes = 0;
6588   if (coding->consumed < coding->src_bytes)
6589     {
6590       int nbytes = coding->src_bytes - coding->consumed;
6591       const unsigned char *src;
6592
6593       coding_set_source (coding);
6594       coding_set_destination (coding);
6595       src = coding->source + coding->consumed;
6596
6597       if (coding->mode & CODING_MODE_LAST_BLOCK)
6598         {
6599           /* Flush out unprocessed data as binary chars.  We are sure
6600              that the number of data is less than the size of
6601              coding->charbuf.  */
6602           coding->charbuf_used = 0;
6603           while (nbytes-- > 0)
6604             {
6605               int c = *src++;
6606
6607               if (c & 0x80)
6608                 c = BYTE8_TO_CHAR (c);
6609               coding->charbuf[coding->charbuf_used++] = c;
6610             }
6611           produce_chars (coding, Qnil, 1);
6612         }
6613       else
6614         {
6615           /* Record unprocessed bytes in coding->carryover.  We are
6616              sure that the number of data is less than the size of
6617              coding->carryover.  */
6618           unsigned char *p = coding->carryover;
6619
6620           coding->carryover_bytes = nbytes;
6621           while (nbytes-- > 0)
6622             *p++ = *src++;
6623         }
6624       coding->consumed = coding->src_bytes;
6625     }
6626
6627   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6628     decode_eol (coding);
6629   if (BUFFERP (coding->dst_object))
6630     {
6631       current_buffer->undo_list = undo_list;
6632       record_insert (coding->dst_pos, coding->produced_char);
6633     }
6634   return coding->result;
6635 }
6636
6637
6638 /* Extract an annotation datum from a composition starting at POS and
6639    ending before LIMIT of CODING->src_object (buffer or string), store
6640    the data in BUF, set *STOP to a starting position of the next
6641    composition (if any) or to LIMIT, and return the address of the
6642    next element of BUF.
6643
6644    If such an annotation is not found, set *STOP to a starting
6645    position of a composition after POS (if any) or to LIMIT, and
6646    return BUF.  */
6647
6648 static INLINE int *
6649 handle_composition_annotation (pos, limit, coding, buf, stop)
6650      EMACS_INT pos, limit;
6651      struct coding_system *coding;
6652      int *buf;
6653      EMACS_INT *stop;
6654 {
6655   EMACS_INT start, end;
6656   Lisp_Object prop;
6657
6658   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6659       || end > limit)
6660     *stop = limit;
6661   else if (start > pos)
6662     *stop = start;
6663   else
6664     {
6665       if (start == pos)
6666         {
6667           /* We found a composition.  Store the corresponding
6668              annotation data in BUF.  */
6669           int *head = buf;
6670           enum composition_method method = COMPOSITION_METHOD (prop);
6671           int nchars = COMPOSITION_LENGTH (prop);
6672
6673           ADD_COMPOSITION_DATA (buf, nchars, method);
6674           if (method != COMPOSITION_RELATIVE)
6675             {
6676               Lisp_Object components;
6677               int len, i, i_byte;
6678
6679               components = COMPOSITION_COMPONENTS (prop);
6680               if (VECTORP (components))
6681                 {
6682                   len = XVECTOR (components)->size;
6683                   for (i = 0; i < len; i++)
6684                     *buf++ = XINT (AREF (components, i));
6685                 }
6686               else if (STRINGP (components))
6687                 {
6688                   len = SCHARS (components);
6689                   i = i_byte = 0;
6690                   while (i < len)
6691                     {
6692                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6693                       buf++;
6694                     }
6695                 }
6696               else if (INTEGERP (components))
6697                 {
6698                   len = 1;
6699                   *buf++ = XINT (components);
6700                 }
6701               else if (CONSP (components))
6702                 {
6703                   for (len = 0; CONSP (components);
6704                        len++, components = XCDR (components))
6705                     *buf++ = XINT (XCAR (components));
6706                 }
6707               else
6708                 abort ();
6709               *head -= len;
6710             }
6711         }
6712
6713       if (find_composition (end, limit, &start, &end, &prop,
6714                             coding->src_object)
6715           && end <= limit)
6716         *stop = start;
6717       else
6718         *stop = limit;
6719     }
6720   return buf;
6721 }
6722
6723
6724 /* Extract an annotation datum from a text property `charset' at POS of
6725    CODING->src_object (buffer of string), store the data in BUF, set
6726    *STOP to the position where the value of `charset' property changes
6727    (limiting by LIMIT), and return the address of the next element of
6728    BUF.
6729
6730    If the property value is nil, set *STOP to the position where the
6731    property value is non-nil (limiting by LIMIT), and return BUF.  */
6732
6733 static INLINE int *
6734 handle_charset_annotation (pos, limit, coding, buf, stop)
6735      EMACS_INT pos, limit;
6736      struct coding_system *coding;
6737      int *buf;
6738      EMACS_INT *stop;
6739 {
6740   Lisp_Object val, next;
6741   int id;
6742
6743   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6744   if (! NILP (val) && CHARSETP (val))
6745     id = XINT (CHARSET_SYMBOL_ID (val));
6746   else
6747     id = -1;
6748   ADD_CHARSET_DATA (buf, 0, id);
6749   next = Fnext_single_property_change (make_number (pos), Qcharset,
6750                                        coding->src_object,
6751                                        make_number (limit));
6752   *stop = XINT (next);
6753   return buf;
6754 }
6755
6756
6757 static void
6758 consume_chars (coding, translation_table, max_lookup)
6759      struct coding_system *coding;
6760      Lisp_Object translation_table;
6761      int max_lookup;
6762 {
6763   int *buf = coding->charbuf;
6764   int *buf_end = coding->charbuf + coding->charbuf_size;
6765   const unsigned char *src = coding->source + coding->consumed;
6766   const unsigned char *src_end = coding->source + coding->src_bytes;
6767   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6768   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6769   int multibytep = coding->src_multibyte;
6770   Lisp_Object eol_type;
6771   int c;
6772   EMACS_INT stop, stop_composition, stop_charset;
6773   int *lookup_buf = NULL;
6774
6775   if (! NILP (translation_table))
6776     lookup_buf = alloca (sizeof (int) * max_lookup);
6777
6778   eol_type = CODING_ID_EOL_TYPE (coding->id);
6779   if (VECTORP (eol_type))
6780     eol_type = Qunix;
6781
6782   /* Note: composition handling is not yet implemented.  */
6783   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6784
6785   if (NILP (coding->src_object))
6786     stop = stop_composition = stop_charset = end_pos;
6787   else
6788     {
6789       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6790         stop = stop_composition = pos;
6791       else
6792         stop = stop_composition = end_pos;
6793       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6794         stop = stop_charset = pos;
6795       else
6796         stop_charset = end_pos;
6797     }
6798
6799   /* Compensate for CRLF and conversion.  */
6800   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6801   while (buf < buf_end)
6802     {
6803       Lisp_Object trans;
6804
6805       if (pos == stop)
6806         {
6807           if (pos == end_pos)
6808             break;
6809           if (pos == stop_composition)
6810             buf = handle_composition_annotation (pos, end_pos, coding,
6811                                                  buf, &stop_composition);
6812           if (pos == stop_charset)
6813             buf = handle_charset_annotation (pos, end_pos, coding,
6814                                              buf, &stop_charset);
6815           stop = (stop_composition < stop_charset
6816                   ? stop_composition : stop_charset);
6817         }
6818
6819       if (! multibytep)
6820         {
6821           EMACS_INT bytes;
6822
6823           if (coding->encoder == encode_coding_raw_text)
6824             c = *src++, pos++;
6825           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6826             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
6827           else
6828             c = BYTE8_TO_CHAR (*src), src++, pos++;
6829         }
6830       else
6831         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
6832       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6833         c = '\n';
6834       if (! EQ (eol_type, Qunix))
6835         {
6836           if (c == '\n')
6837             {
6838               if (EQ (eol_type, Qdos))
6839                 *buf++ = '\r';
6840               else
6841                 c = '\r';
6842             }
6843         }
6844
6845       trans = Qnil;
6846       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6847       if (NILP (trans))
6848         *buf++ = c;
6849       else
6850         {
6851           int from_nchars = 1, to_nchars = 1;
6852           int *lookup_buf_end;
6853           const unsigned char *p = src;
6854           int i;
6855
6856           lookup_buf[0] = c;
6857           for (i = 1; i < max_lookup && p < src_end; i++)
6858             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6859           lookup_buf_end = lookup_buf + i;
6860           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6861                                    &from_nchars, &to_nchars);
6862           if (EQ (trans, Qt)
6863               || buf + to_nchars > buf_end)
6864             break;
6865           *buf++ = *lookup_buf;
6866           for (i = 1; i < to_nchars; i++)
6867             *buf++ = XINT (AREF (trans, i));
6868           for (i = 1; i < from_nchars; i++, pos++)
6869             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6870         }
6871     }
6872
6873   coding->consumed = src - coding->source;
6874   coding->consumed_char = pos - coding->src_pos;
6875   coding->charbuf_used = buf - coding->charbuf;
6876   coding->chars_at_source = 0;
6877 }
6878
6879
6880 /* Encode the text at CODING->src_object into CODING->dst_object.
6881    CODING->src_object is a buffer or a string.
6882    CODING->dst_object is a buffer or nil.
6883
6884    If CODING->src_object is a buffer, it must be the current buffer.
6885    In this case, if CODING->src_pos is positive, it is a position of
6886    the source text in the buffer, otherwise. the source text is in the
6887    gap area of the buffer, and coding->src_pos specifies the offset of
6888    the text from GPT (which must be the same as PT).  If this is the
6889    same buffer as CODING->dst_object, CODING->src_pos must be
6890    negative and CODING should not have `pre-write-conversion'.
6891
6892    If CODING->src_object is a string, CODING should not have
6893    `pre-write-conversion'.
6894
6895    If CODING->dst_object is a buffer, the encoded data is inserted at
6896    the current point of that buffer.
6897
6898    If CODING->dst_object is nil, the encoded data is placed at the
6899    memory area specified by CODING->destination.  */
6900
6901 static int
6902 encode_coding (coding)
6903      struct coding_system *coding;
6904 {
6905   Lisp_Object attrs;
6906   Lisp_Object translation_table;
6907   int max_lookup;
6908
6909   attrs = CODING_ID_ATTRS (coding->id);
6910   if (coding->encoder == encode_coding_raw_text)
6911     translation_table = Qnil, max_lookup = 0;
6912   else
6913     translation_table = get_translation_table (attrs, 1, &max_lookup);
6914
6915   if (BUFFERP (coding->dst_object))
6916     {
6917       set_buffer_internal (XBUFFER (coding->dst_object));
6918       coding->dst_multibyte
6919         = ! NILP (current_buffer->enable_multibyte_characters);
6920     }
6921
6922   coding->consumed = coding->consumed_char = 0;
6923   coding->produced = coding->produced_char = 0;
6924   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6925   coding->errors = 0;
6926
6927   ALLOC_CONVERSION_WORK_AREA (coding);
6928
6929   do {
6930     coding_set_source (coding);
6931     consume_chars (coding, translation_table, max_lookup);
6932     coding_set_destination (coding);
6933     (*(coding->encoder)) (coding);
6934   } while (coding->consumed_char < coding->src_chars);
6935
6936   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
6937     insert_from_gap (coding->produced_char, coding->produced);
6938
6939   return (coding->result);
6940 }
6941
6942
6943 /* Name (or base name) of work buffer for code conversion.  */
6944 static Lisp_Object Vcode_conversion_workbuf_name;
6945
6946 /* A working buffer used by the top level conversion.  Once it is
6947    created, it is never destroyed.  It has the name
6948    Vcode_conversion_workbuf_name.  The other working buffers are
6949    destroyed after the use is finished, and their names are modified
6950    versions of Vcode_conversion_workbuf_name.  */
6951 static Lisp_Object Vcode_conversion_reused_workbuf;
6952
6953 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6954 static int reused_workbuf_in_use;
6955
6956
6957 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6958    multibyteness of returning buffer.  */
6959
6960 static Lisp_Object
6961 make_conversion_work_buffer (multibyte)
6962      int multibyte;
6963 {
6964   Lisp_Object name, workbuf;
6965   struct buffer *current;
6966
6967   if (reused_workbuf_in_use++)
6968     {
6969       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6970       workbuf = Fget_buffer_create (name);
6971     }
6972   else
6973     {
6974       name = Vcode_conversion_workbuf_name;
6975       workbuf = Fget_buffer_create (name);
6976       if (NILP (Vcode_conversion_reused_workbuf))
6977         Vcode_conversion_reused_workbuf = workbuf;
6978     }
6979   current = current_buffer;
6980   set_buffer_internal (XBUFFER (workbuf));
6981   Ferase_buffer ();
6982   current_buffer->undo_list = Qt;
6983   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6984   set_buffer_internal (current);
6985   return workbuf;
6986 }
6987
6988
6989 static Lisp_Object
6990 code_conversion_restore (arg)
6991      Lisp_Object arg;
6992 {
6993   Lisp_Object current, workbuf;
6994   struct gcpro gcpro1;
6995
6996   GCPRO1 (arg);
6997   current = XCAR (arg);
6998   workbuf = XCDR (arg);
6999   if (! NILP (workbuf))
7000     {
7001       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7002         reused_workbuf_in_use = 0;
7003       else if (! NILP (Fbuffer_live_p (workbuf)))
7004         Fkill_buffer (workbuf);
7005     }
7006   set_buffer_internal (XBUFFER (current));
7007   UNGCPRO;
7008   return Qnil;
7009 }
7010
7011 Lisp_Object
7012 code_conversion_save (with_work_buf, multibyte)
7013      int with_work_buf, multibyte;
7014 {
7015   Lisp_Object workbuf = Qnil;
7016
7017   if (with_work_buf)
7018     workbuf = make_conversion_work_buffer (multibyte);
7019   record_unwind_protect (code_conversion_restore,
7020                          Fcons (Fcurrent_buffer (), workbuf));
7021   return workbuf;
7022 }
7023
7024 int
7025 decode_coding_gap (coding, chars, bytes)
7026      struct coding_system *coding;
7027      EMACS_INT chars, bytes;
7028 {
7029   int count = specpdl_ptr - specpdl;
7030   Lisp_Object attrs;
7031
7032   code_conversion_save (0, 0);
7033
7034   coding->src_object = Fcurrent_buffer ();
7035   coding->src_chars = chars;
7036   coding->src_bytes = bytes;
7037   coding->src_pos = -chars;
7038   coding->src_pos_byte = -bytes;
7039   coding->src_multibyte = chars < bytes;
7040   coding->dst_object = coding->src_object;
7041   coding->dst_pos = PT;
7042   coding->dst_pos_byte = PT_BYTE;
7043   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7044
7045   if (CODING_REQUIRE_DETECTION (coding))
7046     detect_coding (coding);
7047
7048   coding->mode |= CODING_MODE_LAST_BLOCK;
7049   current_buffer->text->inhibit_shrinking = 1;
7050   decode_coding (coding);
7051   current_buffer->text->inhibit_shrinking = 0;
7052
7053   attrs = CODING_ID_ATTRS (coding->id);
7054   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7055     {
7056       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7057       Lisp_Object val;
7058
7059       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7060       val = call1 (CODING_ATTR_POST_READ (attrs),
7061                    make_number (coding->produced_char));
7062       CHECK_NATNUM (val);
7063       coding->produced_char += Z - prev_Z;
7064       coding->produced += Z_BYTE - prev_Z_BYTE;
7065     }
7066
7067   unbind_to (count, Qnil);
7068   return coding->result;
7069 }
7070
7071 int
7072 encode_coding_gap (coding, chars, bytes)
7073      struct coding_system *coding;
7074      EMACS_INT chars, bytes;
7075 {
7076   int count = specpdl_ptr - specpdl;
7077
7078   code_conversion_save (0, 0);
7079
7080   coding->src_object = Fcurrent_buffer ();
7081   coding->src_chars = chars;
7082   coding->src_bytes = bytes;
7083   coding->src_pos = -chars;
7084   coding->src_pos_byte = -bytes;
7085   coding->src_multibyte = chars < bytes;
7086   coding->dst_object = coding->src_object;
7087   coding->dst_pos = PT;
7088   coding->dst_pos_byte = PT_BYTE;
7089
7090   encode_coding (coding);
7091
7092   unbind_to (count, Qnil);
7093   return coding->result;
7094 }
7095
7096
7097 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7098    SRC_OBJECT into DST_OBJECT by coding context CODING.
7099
7100    SRC_OBJECT is a buffer, a string, or Qnil.
7101
7102    If it is a buffer, the text is at point of the buffer.  FROM and TO
7103    are positions in the buffer.
7104
7105    If it is a string, the text is at the beginning of the string.
7106    FROM and TO are indices to the string.
7107
7108    If it is nil, the text is at coding->source.  FROM and TO are
7109    indices to coding->source.
7110
7111    DST_OBJECT is a buffer, Qt, or Qnil.
7112
7113    If it is a buffer, the decoded text is inserted at point of the
7114    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7115    is deleted.
7116
7117    If it is Qt, a string is made from the decoded text, and
7118    set in CODING->dst_object.
7119
7120    If it is Qnil, the decoded text is stored at CODING->destination.
7121    The caller must allocate CODING->dst_bytes bytes at
7122    CODING->destination by xmalloc.  If the decoded text is longer than
7123    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7124  */
7125
7126 void
7127 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7128                       dst_object)
7129      struct coding_system *coding;
7130      Lisp_Object src_object;
7131      EMACS_INT from, from_byte, to, to_byte;
7132      Lisp_Object dst_object;
7133 {
7134   int count = specpdl_ptr - specpdl;
7135   unsigned char *destination;
7136   EMACS_INT dst_bytes;
7137   EMACS_INT chars = to - from;
7138   EMACS_INT bytes = to_byte - from_byte;
7139   Lisp_Object attrs;
7140   int saved_pt = -1, saved_pt_byte;
7141   int need_marker_adjustment = 0;
7142   Lisp_Object old_deactivate_mark;
7143
7144   old_deactivate_mark = Vdeactivate_mark;
7145
7146   if (NILP (dst_object))
7147     {
7148       destination = coding->destination;
7149       dst_bytes = coding->dst_bytes;
7150     }
7151
7152   coding->src_object = src_object;
7153   coding->src_chars = chars;
7154   coding->src_bytes = bytes;
7155   coding->src_multibyte = chars < bytes;
7156
7157   if (STRINGP (src_object))
7158     {
7159       coding->src_pos = from;
7160       coding->src_pos_byte = from_byte;
7161     }
7162   else if (BUFFERP (src_object))
7163     {
7164       set_buffer_internal (XBUFFER (src_object));
7165       if (from != GPT)
7166         move_gap_both (from, from_byte);
7167       if (EQ (src_object, dst_object))
7168         {
7169           struct Lisp_Marker *tail;
7170
7171           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7172             {
7173               tail->need_adjustment
7174                 = tail->charpos == (tail->insertion_type ? from : to);
7175               need_marker_adjustment |= tail->need_adjustment;
7176             }
7177           saved_pt = PT, saved_pt_byte = PT_BYTE;
7178           TEMP_SET_PT_BOTH (from, from_byte);
7179           current_buffer->text->inhibit_shrinking = 1;
7180           del_range_both (from, from_byte, to, to_byte, 1);
7181           coding->src_pos = -chars;
7182           coding->src_pos_byte = -bytes;
7183         }
7184       else
7185         {
7186           coding->src_pos = from;
7187           coding->src_pos_byte = from_byte;
7188         }
7189     }
7190
7191   if (CODING_REQUIRE_DETECTION (coding))
7192     detect_coding (coding);
7193   attrs = CODING_ID_ATTRS (coding->id);
7194
7195   if (EQ (dst_object, Qt)
7196       || (! NILP (CODING_ATTR_POST_READ (attrs))
7197           && NILP (dst_object)))
7198     {
7199       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7200       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7201       coding->dst_pos = BEG;
7202       coding->dst_pos_byte = BEG_BYTE;
7203     }
7204   else if (BUFFERP (dst_object))
7205     {
7206       code_conversion_save (0, 0);
7207       coding->dst_object = dst_object;
7208       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7209       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7210       coding->dst_multibyte
7211         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7212     }
7213   else
7214     {
7215       code_conversion_save (0, 0);
7216       coding->dst_object = Qnil;
7217       /* Most callers presume this will return a multibyte result, and they
7218          won't use `binary' or `raw-text' anyway, so let's not worry about
7219          CODING_FOR_UNIBYTE.  */
7220       coding->dst_multibyte = 1;
7221     }
7222
7223   decode_coding (coding);
7224
7225   if (BUFFERP (coding->dst_object))
7226     set_buffer_internal (XBUFFER (coding->dst_object));
7227
7228   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7229     {
7230       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7231       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7232       Lisp_Object val;
7233
7234       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7235       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7236               old_deactivate_mark);
7237       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7238                         make_number (coding->produced_char));
7239       UNGCPRO;
7240       CHECK_NATNUM (val);
7241       coding->produced_char += Z - prev_Z;
7242       coding->produced += Z_BYTE - prev_Z_BYTE;
7243     }
7244
7245   if (EQ (dst_object, Qt))
7246     {
7247       coding->dst_object = Fbuffer_string ();
7248     }
7249   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7250     {
7251       set_buffer_internal (XBUFFER (coding->dst_object));
7252       if (dst_bytes < coding->produced)
7253         {
7254           destination = xrealloc (destination, coding->produced);
7255           if (! destination)
7256             {
7257               record_conversion_result (coding,
7258                                         CODING_RESULT_INSUFFICIENT_DST);
7259               unbind_to (count, Qnil);
7260               return;
7261             }
7262           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7263             move_gap_both (BEGV, BEGV_BYTE);
7264           bcopy (BEGV_ADDR, destination, coding->produced);
7265           coding->destination = destination;
7266         }
7267     }
7268
7269   if (saved_pt >= 0)
7270     {
7271       /* This is the case of:
7272          (BUFFERP (src_object) && EQ (src_object, dst_object))
7273          As we have moved PT while replacing the original buffer
7274          contents, we must recover it now.  */
7275       set_buffer_internal (XBUFFER (src_object));
7276       current_buffer->text->inhibit_shrinking = 0;
7277       if (saved_pt < from)
7278         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7279       else if (saved_pt < from + chars)
7280         TEMP_SET_PT_BOTH (from, from_byte);
7281       else if (! NILP (current_buffer->enable_multibyte_characters))
7282         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7283                           saved_pt_byte + (coding->produced - bytes));
7284       else
7285         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7286                           saved_pt_byte + (coding->produced - bytes));
7287
7288       if (need_marker_adjustment)
7289         {
7290           struct Lisp_Marker *tail;
7291
7292           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7293             if (tail->need_adjustment)
7294               {
7295                 tail->need_adjustment = 0;
7296                 if (tail->insertion_type)
7297                   {
7298                     tail->bytepos = from_byte;
7299                     tail->charpos = from;
7300                   }
7301                 else
7302                   {
7303                     tail->bytepos = from_byte + coding->produced;
7304                     tail->charpos
7305                       = (NILP (current_buffer->enable_multibyte_characters)
7306                          ? tail->bytepos : from + coding->produced_char);
7307                   }
7308               }
7309         }
7310     }
7311
7312   Vdeactivate_mark = old_deactivate_mark;
7313   unbind_to (count, coding->dst_object);
7314 }
7315
7316
7317 void
7318 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7319                       dst_object)
7320      struct coding_system *coding;
7321      Lisp_Object src_object;
7322      EMACS_INT from, from_byte, to, to_byte;
7323      Lisp_Object dst_object;
7324 {
7325   int count = specpdl_ptr - specpdl;
7326   EMACS_INT chars = to - from;
7327   EMACS_INT bytes = to_byte - from_byte;
7328   Lisp_Object attrs;
7329   int saved_pt = -1, saved_pt_byte;
7330   int need_marker_adjustment = 0;
7331   int kill_src_buffer = 0;
7332   Lisp_Object old_deactivate_mark;
7333
7334   old_deactivate_mark = Vdeactivate_mark;
7335
7336   coding->src_object = src_object;
7337   coding->src_chars = chars;
7338   coding->src_bytes = bytes;
7339   coding->src_multibyte = chars < bytes;
7340
7341   attrs = CODING_ID_ATTRS (coding->id);
7342
7343   if (EQ (src_object, dst_object))
7344     {
7345       struct Lisp_Marker *tail;
7346
7347       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7348         {
7349           tail->need_adjustment
7350             = tail->charpos == (tail->insertion_type ? from : to);
7351           need_marker_adjustment |= tail->need_adjustment;
7352         }
7353     }
7354
7355   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7356     {
7357       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7358       set_buffer_internal (XBUFFER (coding->src_object));
7359       if (STRINGP (src_object))
7360         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7361       else if (BUFFERP (src_object))
7362         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7363       else
7364         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7365
7366       if (EQ (src_object, dst_object))
7367         {
7368           set_buffer_internal (XBUFFER (src_object));
7369           saved_pt = PT, saved_pt_byte = PT_BYTE;
7370           del_range_both (from, from_byte, to, to_byte, 1);
7371           set_buffer_internal (XBUFFER (coding->src_object));
7372         }
7373
7374       {
7375         Lisp_Object args[3];
7376         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7377
7378         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7379                 old_deactivate_mark);
7380         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7381         args[1] = make_number (BEG);
7382         args[2] = make_number (Z);
7383         safe_call (3, args);
7384         UNGCPRO;
7385       }
7386       if (XBUFFER (coding->src_object) != current_buffer)
7387         kill_src_buffer = 1;
7388       coding->src_object = Fcurrent_buffer ();
7389       if (BEG != GPT)
7390         move_gap_both (BEG, BEG_BYTE);
7391       coding->src_chars = Z - BEG;
7392       coding->src_bytes = Z_BYTE - BEG_BYTE;
7393       coding->src_pos = BEG;
7394       coding->src_pos_byte = BEG_BYTE;
7395       coding->src_multibyte = Z < Z_BYTE;
7396     }
7397   else if (STRINGP (src_object))
7398     {
7399       code_conversion_save (0, 0);
7400       coding->src_pos = from;
7401       coding->src_pos_byte = from_byte;
7402     }
7403   else if (BUFFERP (src_object))
7404     {
7405       code_conversion_save (0, 0);
7406       set_buffer_internal (XBUFFER (src_object));
7407       if (EQ (src_object, dst_object))
7408         {
7409           saved_pt = PT, saved_pt_byte = PT_BYTE;
7410           coding->src_object = del_range_1 (from, to, 1, 1);
7411           coding->src_pos = 0;
7412           coding->src_pos_byte = 0;
7413         }
7414       else
7415         {
7416           if (from < GPT && to >= GPT)
7417             move_gap_both (from, from_byte);
7418           coding->src_pos = from;
7419           coding->src_pos_byte = from_byte;
7420         }
7421     }
7422   else
7423     code_conversion_save (0, 0);
7424
7425   if (BUFFERP (dst_object))
7426     {
7427       coding->dst_object = dst_object;
7428       if (EQ (src_object, dst_object))
7429         {
7430           coding->dst_pos = from;
7431           coding->dst_pos_byte = from_byte;
7432         }
7433       else
7434         {
7435           struct buffer *current = current_buffer;
7436
7437           set_buffer_temp (XBUFFER (dst_object));
7438           coding->dst_pos = PT;
7439           coding->dst_pos_byte = PT_BYTE;
7440           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7441           set_buffer_temp (current);
7442         }
7443       coding->dst_multibyte
7444         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7445     }
7446   else if (EQ (dst_object, Qt))
7447     {
7448       coding->dst_object = Qnil;
7449       coding->dst_bytes = coding->src_chars;
7450       if (coding->dst_bytes == 0)
7451         coding->dst_bytes = 1;
7452       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
7453       coding->dst_multibyte = 0;
7454     }
7455   else
7456     {
7457       coding->dst_object = Qnil;
7458       coding->dst_multibyte = 0;
7459     }
7460
7461   encode_coding (coding);
7462
7463   if (EQ (dst_object, Qt))
7464     {
7465       if (BUFFERP (coding->dst_object))
7466         coding->dst_object = Fbuffer_string ();
7467       else
7468         {
7469           coding->dst_object
7470             = make_unibyte_string ((char *) coding->destination,
7471                                    coding->produced);
7472           xfree (coding->destination);
7473         }
7474     }
7475
7476   if (saved_pt >= 0)
7477     {
7478       /* This is the case of:
7479          (BUFFERP (src_object) && EQ (src_object, dst_object))
7480          As we have moved PT while replacing the original buffer
7481          contents, we must recover it now.  */
7482       set_buffer_internal (XBUFFER (src_object));
7483       if (saved_pt < from)
7484         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7485       else if (saved_pt < from + chars)
7486         TEMP_SET_PT_BOTH (from, from_byte);
7487       else if (! NILP (current_buffer->enable_multibyte_characters))
7488         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7489                           saved_pt_byte + (coding->produced - bytes));
7490       else
7491         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7492                           saved_pt_byte + (coding->produced - bytes));
7493
7494       if (need_marker_adjustment)
7495         {
7496           struct Lisp_Marker *tail;
7497
7498           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7499             if (tail->need_adjustment)
7500               {
7501                 tail->need_adjustment = 0;
7502                 if (tail->insertion_type)
7503                   {
7504                     tail->bytepos = from_byte;
7505                     tail->charpos = from;
7506                   }
7507                 else
7508                   {
7509                     tail->bytepos = from_byte + coding->produced;
7510                     tail->charpos
7511                       = (NILP (current_buffer->enable_multibyte_characters)
7512                          ? tail->bytepos : from + coding->produced_char);
7513                   }
7514               }
7515         }
7516     }
7517
7518   if (kill_src_buffer)
7519     Fkill_buffer (coding->src_object);
7520
7521   Vdeactivate_mark = old_deactivate_mark;
7522   unbind_to (count, Qnil);
7523 }
7524
7525
7526 Lisp_Object
7527 preferred_coding_system ()
7528 {
7529   int id = coding_categories[coding_priorities[0]].id;
7530
7531   return CODING_ID_NAME (id);
7532 }
7533
7534 \f
7535 #ifdef emacs
7536 /*** 8. Emacs Lisp library functions ***/
7537
7538 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
7539        doc: /* Return t if OBJECT is nil or a coding-system.
7540 See the documentation of `define-coding-system' for information
7541 about coding-system objects.  */)
7542      (object)
7543      Lisp_Object object;
7544 {
7545   if (NILP (object)
7546       || CODING_SYSTEM_ID (object) >= 0)
7547     return Qt;
7548   if (! SYMBOLP (object)
7549       || NILP (Fget (object, Qcoding_system_define_form)))
7550     return Qnil;
7551   return Qt;
7552 }
7553
7554 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7555        Sread_non_nil_coding_system, 1, 1, 0,
7556        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
7557      (prompt)
7558      Lisp_Object prompt;
7559 {
7560   Lisp_Object val;
7561   do
7562     {
7563       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7564                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
7565     }
7566   while (SCHARS (val) == 0);
7567   return (Fintern (val, Qnil));
7568 }
7569
7570 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
7571        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7572 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7573 Ignores case when completing coding systems (all Emacs coding systems
7574 are lower-case).  */)
7575      (prompt, default_coding_system)
7576      Lisp_Object prompt, default_coding_system;
7577 {
7578   Lisp_Object val;
7579   int count = SPECPDL_INDEX ();
7580
7581   if (SYMBOLP (default_coding_system))
7582     default_coding_system = SYMBOL_NAME (default_coding_system);
7583   specbind (Qcompletion_ignore_case, Qt);
7584   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7585                           Qt, Qnil, Qcoding_system_history,
7586                           default_coding_system, Qnil);
7587   unbind_to (count, Qnil);
7588   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
7589 }
7590
7591 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7592        1, 1, 0,
7593        doc: /* Check validity of CODING-SYSTEM.
7594 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7595 It is valid if it is nil or a symbol defined as a coding system by the
7596 function `define-coding-system'.  */)
7597   (coding_system)
7598      Lisp_Object coding_system;
7599 {
7600   Lisp_Object define_form;
7601
7602   define_form = Fget (coding_system, Qcoding_system_define_form);
7603   if (! NILP (define_form))
7604     {
7605       Fput (coding_system, Qcoding_system_define_form, Qnil);
7606       safe_eval (define_form);
7607     }
7608   if (!NILP (Fcoding_system_p (coding_system)))
7609     return coding_system;
7610   xsignal1 (Qcoding_system_error, coding_system);
7611 }
7612
7613 \f
7614 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
7615    HIGHEST is nonzero, return the coding system of the highest
7616    priority among the detected coding systems.  Otherwize return a
7617    list of detected coding systems sorted by their priorities.  If
7618    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7619    multibyte form but contains only ASCII and eight-bit chars.
7620    Otherwise, the bytes are raw bytes.
7621
7622    CODING-SYSTEM controls the detection as below:
7623
7624    If it is nil, detect both text-format and eol-format.  If the
7625    text-format part of CODING-SYSTEM is already specified
7626    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
7627    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7628    detect only text-format.  */
7629
7630 Lisp_Object
7631 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7632                       coding_system)
7633      const unsigned char *src;
7634      EMACS_INT src_chars, src_bytes;
7635      int highest;
7636      int multibytep;
7637      Lisp_Object coding_system;
7638 {
7639   const unsigned char *src_end = src + src_bytes;
7640   Lisp_Object attrs, eol_type;
7641   Lisp_Object val;
7642   struct coding_system coding;
7643   int id;
7644   struct coding_detection_info detect_info;
7645   enum coding_category base_category;
7646   int null_byte_found = 0, eight_bit_found = 0;
7647
7648   if (NILP (coding_system))
7649     coding_system = Qundecided;
7650   setup_coding_system (coding_system, &coding);
7651   attrs = CODING_ID_ATTRS (coding.id);
7652   eol_type = CODING_ID_EOL_TYPE (coding.id);
7653   coding_system = CODING_ATTR_BASE_NAME (attrs);
7654
7655   coding.source = src;
7656   coding.src_chars = src_chars;
7657   coding.src_bytes = src_bytes;
7658   coding.src_multibyte = multibytep;
7659   coding.consumed = 0;
7660   coding.mode |= CODING_MODE_LAST_BLOCK;
7661   coding.head_ascii = 0;
7662
7663   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7664
7665   /* At first, detect text-format if necessary.  */
7666   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7667   if (base_category == coding_category_undecided)
7668     {
7669       enum coding_category category;
7670       struct coding_system *this;
7671       int c, i;
7672
7673       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7674       for (; src < src_end; src++)
7675         {
7676           c = *src;
7677           if (c & 0x80)
7678             {
7679               eight_bit_found = 1;
7680               if (null_byte_found)
7681                 break;
7682             }
7683           else if (c < 0x20)
7684             {
7685               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7686                   && ! inhibit_iso_escape_detection
7687                   && ! detect_info.checked)
7688                 {
7689                   if (detect_coding_iso_2022 (&coding, &detect_info))
7690                     {
7691                       /* We have scanned the whole data.  */
7692                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7693                         {
7694                           /* We didn't find an 8-bit code.  We may
7695                              have found a null-byte, but it's very
7696                              rare that a binary file confirm to
7697                              ISO-2022.  */
7698                           src = src_end;
7699                           coding.head_ascii = src - coding.source;
7700                         }
7701                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
7702                       break;
7703                     }
7704                 }
7705               else if (! c)
7706                 {
7707                   null_byte_found = 1;
7708                   if (eight_bit_found)
7709                     break;
7710                 }
7711               if (! eight_bit_found)
7712                 coding.head_ascii++;
7713             }
7714           else if (! eight_bit_found)
7715             coding.head_ascii++;
7716         }
7717
7718       if (null_byte_found || eight_bit_found
7719           || coding.head_ascii < coding.src_bytes
7720           || detect_info.found)
7721         {
7722           if (coding.head_ascii == coding.src_bytes)
7723             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
7724             for (i = 0; i < coding_category_raw_text; i++)
7725               {
7726                 category = coding_priorities[i];
7727                 this = coding_categories + category;
7728                 if (detect_info.found & (1 << category))
7729                   break;
7730               }
7731           else
7732             {
7733               if (null_byte_found)
7734                 {
7735                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
7736                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
7737                 }
7738               for (i = 0; i < coding_category_raw_text; i++)
7739                 {
7740                   category = coding_priorities[i];
7741                   this = coding_categories + category;
7742
7743                   if (this->id < 0)
7744                     {
7745                       /* No coding system of this category is defined.  */
7746                       detect_info.rejected |= (1 << category);
7747                     }
7748                   else if (category >= coding_category_raw_text)
7749                     continue;
7750                   else if (detect_info.checked & (1 << category))
7751                     {
7752                       if (highest
7753                           && (detect_info.found & (1 << category)))
7754                         break;
7755                     }
7756                   else if ((*(this->detector)) (&coding, &detect_info)
7757                            && highest
7758                            && (detect_info.found & (1 << category)))
7759                     {
7760                       if (category == coding_category_utf_16_auto)
7761                         {
7762                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7763                             category = coding_category_utf_16_le;
7764                           else
7765                             category = coding_category_utf_16_be;
7766                         }
7767                       break;
7768                     }
7769                 }
7770             }
7771         }
7772
7773       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY)
7774         {
7775           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7776           id = coding_categories[coding_category_raw_text].id;
7777           val = Fcons (make_number (id), Qnil);
7778         }
7779       else if (! detect_info.rejected && ! detect_info.found)
7780         {
7781           detect_info.found = CATEGORY_MASK_ANY;
7782           id = coding_categories[coding_category_undecided].id;
7783           val = Fcons (make_number (id), Qnil);
7784         }
7785       else if (highest)
7786         {
7787           if (detect_info.found)
7788             {
7789               detect_info.found = 1 << category;
7790               val = Fcons (make_number (this->id), Qnil);
7791             }
7792           else
7793             for (i = 0; i < coding_category_raw_text; i++)
7794               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7795                 {
7796                   detect_info.found = 1 << coding_priorities[i];
7797                   id = coding_categories[coding_priorities[i]].id;
7798                   val = Fcons (make_number (id), Qnil);
7799                   break;
7800                 }
7801         }
7802       else
7803         {
7804           int mask = detect_info.rejected | detect_info.found;
7805           int found = 0;
7806           val = Qnil;
7807
7808           for (i = coding_category_raw_text - 1; i >= 0; i--)
7809             {
7810               category = coding_priorities[i];
7811               if (! (mask & (1 << category)))
7812                 {
7813                   found |= 1 << category;
7814                   id = coding_categories[category].id;
7815                   if (id >= 0)
7816                     val = Fcons (make_number (id), val);
7817                 }
7818             }
7819           for (i = coding_category_raw_text - 1; i >= 0; i--)
7820             {
7821               category = coding_priorities[i];
7822               if (detect_info.found & (1 << category))
7823                 {
7824                   id = coding_categories[category].id;
7825                   val = Fcons (make_number (id), val);
7826                 }
7827             }
7828           detect_info.found |= found;
7829         }
7830     }
7831   else if (base_category == coding_category_utf_8_auto)
7832     {
7833       if (detect_coding_utf_8 (&coding, &detect_info))
7834         {
7835           struct coding_system *this;
7836
7837           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
7838             this = coding_categories + coding_category_utf_8_sig;
7839           else
7840             this = coding_categories + coding_category_utf_8_nosig;
7841           val = Fcons (make_number (this->id), Qnil);
7842         }
7843     }
7844   else if (base_category == coding_category_utf_16_auto)
7845     {
7846       if (detect_coding_utf_16 (&coding, &detect_info))
7847         {
7848           struct coding_system *this;
7849
7850           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7851             this = coding_categories + coding_category_utf_16_le;
7852           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7853             this = coding_categories + coding_category_utf_16_be;
7854           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7855             this = coding_categories + coding_category_utf_16_be_nosig;
7856           else
7857             this = coding_categories + coding_category_utf_16_le_nosig;
7858           val = Fcons (make_number (this->id), Qnil);
7859         }
7860     }
7861   else
7862     {
7863       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7864       val = Fcons (make_number (coding.id), Qnil);
7865     }
7866
7867   /* Then, detect eol-format if necessary.  */
7868   {
7869     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
7870     Lisp_Object tail;
7871
7872     if (VECTORP (eol_type))
7873       {
7874         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7875           {
7876             if (null_byte_found)
7877               normal_eol = EOL_SEEN_LF;
7878             else
7879               normal_eol = detect_eol (coding.source, src_bytes,
7880                                        coding_category_raw_text);
7881           }
7882         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7883                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7884           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7885                                       coding_category_utf_16_be);
7886         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7887                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7888           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7889                                       coding_category_utf_16_le);
7890       }
7891     else
7892       {
7893         if (EQ (eol_type, Qunix))
7894           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7895         else if (EQ (eol_type, Qdos))
7896           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7897         else
7898           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7899       }
7900
7901     for (tail = val; CONSP (tail); tail = XCDR (tail))
7902       {
7903         enum coding_category category;
7904         int this_eol;
7905
7906         id = XINT (XCAR (tail));
7907         attrs = CODING_ID_ATTRS (id);
7908         category = XINT (CODING_ATTR_CATEGORY (attrs));
7909         eol_type = CODING_ID_EOL_TYPE (id);
7910         if (VECTORP (eol_type))
7911           {
7912             if (category == coding_category_utf_16_be
7913                 || category == coding_category_utf_16_be_nosig)
7914               this_eol = utf_16_be_eol;
7915             else if (category == coding_category_utf_16_le
7916                      || category == coding_category_utf_16_le_nosig)
7917               this_eol = utf_16_le_eol;
7918             else
7919               this_eol = normal_eol;
7920
7921             if (this_eol == EOL_SEEN_LF)
7922               XSETCAR (tail, AREF (eol_type, 0));
7923             else if (this_eol == EOL_SEEN_CRLF)
7924               XSETCAR (tail, AREF (eol_type, 1));
7925             else if (this_eol == EOL_SEEN_CR)
7926               XSETCAR (tail, AREF (eol_type, 2));
7927             else
7928               XSETCAR (tail, CODING_ID_NAME (id));
7929           }
7930         else
7931           XSETCAR (tail, CODING_ID_NAME (id));
7932       }
7933   }
7934
7935   return (highest ? XCAR (val) : val);
7936 }
7937
7938
7939 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7940        2, 3, 0,
7941        doc: /* Detect coding system of the text in the region between START and END.
7942 Return a list of possible coding systems ordered by priority.
7943
7944 If only ASCII characters are found (except for such ISO-2022 control
7945 characters as ESC), it returns a list of single element `undecided'
7946 or its subsidiary coding system according to a detected end-of-line
7947 format.
7948
7949 If optional argument HIGHEST is non-nil, return the coding system of
7950 highest priority.  */)
7951      (start, end, highest)
7952      Lisp_Object start, end, highest;
7953 {
7954   int from, to;
7955   int from_byte, to_byte;
7956
7957   CHECK_NUMBER_COERCE_MARKER (start);
7958   CHECK_NUMBER_COERCE_MARKER (end);
7959
7960   validate_region (&start, &end);
7961   from = XINT (start), to = XINT (end);
7962   from_byte = CHAR_TO_BYTE (from);
7963   to_byte = CHAR_TO_BYTE (to);
7964
7965   if (from < GPT && to >= GPT)
7966     move_gap_both (to, to_byte);
7967
7968   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7969                                to - from, to_byte - from_byte,
7970                                !NILP (highest),
7971                                !NILP (current_buffer
7972                                       ->enable_multibyte_characters),
7973                                Qnil);
7974 }
7975
7976 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7977        1, 2, 0,
7978        doc: /* Detect coding system of the text in STRING.
7979 Return a list of possible coding systems ordered by priority.
7980
7981 If only ASCII characters are found (except for such ISO-2022 control
7982 characters as ESC), it returns a list of single element `undecided'
7983 or its subsidiary coding system according to a detected end-of-line
7984 format.
7985
7986 If optional argument HIGHEST is non-nil, return the coding system of
7987 highest priority.  */)
7988      (string, highest)
7989      Lisp_Object string, highest;
7990 {
7991   CHECK_STRING (string);
7992
7993   return detect_coding_system (SDATA (string),
7994                                SCHARS (string), SBYTES (string),
7995                                !NILP (highest), STRING_MULTIBYTE (string),
7996                                Qnil);
7997 }
7998
7999
8000 static INLINE int
8001 char_encodable_p (c, attrs)
8002      int c;
8003      Lisp_Object attrs;
8004 {
8005   Lisp_Object tail;
8006   struct charset *charset;
8007   Lisp_Object translation_table;
8008
8009   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8010   if (! NILP (translation_table))
8011     c = translate_char (translation_table, c);
8012   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8013        CONSP (tail); tail = XCDR (tail))
8014     {
8015       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8016       if (CHAR_CHARSET_P (c, charset))
8017         break;
8018     }
8019   return (! NILP (tail));
8020 }
8021
8022
8023 /* Return a list of coding systems that safely encode the text between
8024    START and END.  If EXCLUDE is non-nil, it is a list of coding
8025    systems not to check.  The returned list doesn't contain any such
8026    coding systems.  In any case, if the text contains only ASCII or is
8027    unibyte, return t.  */
8028
8029 DEFUN ("find-coding-systems-region-internal",
8030        Ffind_coding_systems_region_internal,
8031        Sfind_coding_systems_region_internal, 2, 3, 0,
8032        doc: /* Internal use only.  */)
8033      (start, end, exclude)
8034      Lisp_Object start, end, exclude;
8035 {
8036   Lisp_Object coding_attrs_list, safe_codings;
8037   EMACS_INT start_byte, end_byte;
8038   const unsigned char *p, *pbeg, *pend;
8039   int c;
8040   Lisp_Object tail, elt;
8041
8042   if (STRINGP (start))
8043     {
8044       if (!STRING_MULTIBYTE (start)
8045           || SCHARS (start) == SBYTES (start))
8046         return Qt;
8047       start_byte = 0;
8048       end_byte = SBYTES (start);
8049     }
8050   else
8051     {
8052       CHECK_NUMBER_COERCE_MARKER (start);
8053       CHECK_NUMBER_COERCE_MARKER (end);
8054       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8055         args_out_of_range (start, end);
8056       if (NILP (current_buffer->enable_multibyte_characters))
8057         return Qt;
8058       start_byte = CHAR_TO_BYTE (XINT (start));
8059       end_byte = CHAR_TO_BYTE (XINT (end));
8060       if (XINT (end) - XINT (start) == end_byte - start_byte)
8061         return Qt;
8062
8063       if (XINT (start) < GPT && XINT (end) > GPT)
8064         {
8065           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8066             move_gap_both (XINT (start), start_byte);
8067           else
8068             move_gap_both (XINT (end), end_byte);
8069         }
8070     }
8071
8072   coding_attrs_list = Qnil;
8073   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8074     if (NILP (exclude)
8075         || NILP (Fmemq (XCAR (tail), exclude)))
8076       {
8077         Lisp_Object attrs;
8078
8079         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8080         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8081             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8082           {
8083             ASET (attrs, coding_attr_trans_tbl,
8084                   get_translation_table (attrs, 1, NULL));
8085             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8086           }
8087       }
8088
8089   if (STRINGP (start))
8090     p = pbeg = SDATA (start);
8091   else
8092     p = pbeg = BYTE_POS_ADDR (start_byte);
8093   pend = p + (end_byte - start_byte);
8094
8095   while (p < pend && ASCII_BYTE_P (*p)) p++;
8096   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8097
8098   while (p < pend)
8099     {
8100       if (ASCII_BYTE_P (*p))
8101         p++;
8102       else
8103         {
8104           c = STRING_CHAR_ADVANCE (p);
8105
8106           charset_map_loaded = 0;
8107           for (tail = coding_attrs_list; CONSP (tail);)
8108             {
8109               elt = XCAR (tail);
8110               if (NILP (elt))
8111                 tail = XCDR (tail);
8112               else if (char_encodable_p (c, elt))
8113                 tail = XCDR (tail);
8114               else if (CONSP (XCDR (tail)))
8115                 {
8116                   XSETCAR (tail, XCAR (XCDR (tail)));
8117                   XSETCDR (tail, XCDR (XCDR (tail)));
8118                 }
8119               else
8120                 {
8121                   XSETCAR (tail, Qnil);
8122                   tail = XCDR (tail);
8123                 }
8124             }
8125           if (charset_map_loaded)
8126             {
8127               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8128
8129               if (STRINGP (start))
8130                 pbeg = SDATA (start);
8131               else
8132                 pbeg = BYTE_POS_ADDR (start_byte);
8133               p = pbeg + p_offset;
8134               pend = pbeg + pend_offset;
8135             }
8136         }
8137     }
8138
8139   safe_codings = list2 (Qraw_text, Qno_conversion);
8140   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8141     if (! NILP (XCAR (tail)))
8142       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8143
8144   return safe_codings;
8145 }
8146
8147
8148 DEFUN ("unencodable-char-position", Funencodable_char_position,
8149        Sunencodable_char_position, 3, 5, 0,
8150        doc: /*
8151 Return position of first un-encodable character in a region.
8152 START and END specify the region and CODING-SYSTEM specifies the
8153 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8154
8155 If optional 4th argument COUNT is non-nil, it specifies at most how
8156 many un-encodable characters to search.  In this case, the value is a
8157 list of positions.
8158
8159 If optional 5th argument STRING is non-nil, it is a string to search
8160 for un-encodable characters.  In that case, START and END are indexes
8161 to the string.  */)
8162      (start, end, coding_system, count, string)
8163      Lisp_Object start, end, coding_system, count, string;
8164 {
8165   int n;
8166   struct coding_system coding;
8167   Lisp_Object attrs, charset_list, translation_table;
8168   Lisp_Object positions;
8169   int from, to;
8170   const unsigned char *p, *stop, *pend;
8171   int ascii_compatible;
8172
8173   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8174   attrs = CODING_ID_ATTRS (coding.id);
8175   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8176     return Qnil;
8177   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8178   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8179   translation_table = get_translation_table (attrs, 1, NULL);
8180
8181   if (NILP (string))
8182     {
8183       validate_region (&start, &end);
8184       from = XINT (start);
8185       to = XINT (end);
8186       if (NILP (current_buffer->enable_multibyte_characters)
8187           || (ascii_compatible
8188               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8189         return Qnil;
8190       p = CHAR_POS_ADDR (from);
8191       pend = CHAR_POS_ADDR (to);
8192       if (from < GPT && to >= GPT)
8193         stop = GPT_ADDR;
8194       else
8195         stop = pend;
8196     }
8197   else
8198     {
8199       CHECK_STRING (string);
8200       CHECK_NATNUM (start);
8201       CHECK_NATNUM (end);
8202       from = XINT (start);
8203       to = XINT (end);
8204       if (from > to
8205           || to > SCHARS (string))
8206         args_out_of_range_3 (string, start, end);
8207       if (! STRING_MULTIBYTE (string))
8208         return Qnil;
8209       p = SDATA (string) + string_char_to_byte (string, from);
8210       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8211       if (ascii_compatible && (to - from) == (pend - p))
8212         return Qnil;
8213     }
8214
8215   if (NILP (count))
8216     n = 1;
8217   else
8218     {
8219       CHECK_NATNUM (count);
8220       n = XINT (count);
8221     }
8222
8223   positions = Qnil;
8224   while (1)
8225     {
8226       int c;
8227
8228       if (ascii_compatible)
8229         while (p < stop && ASCII_BYTE_P (*p))
8230           p++, from++;
8231       if (p >= stop)
8232         {
8233           if (p >= pend)
8234             break;
8235           stop = pend;
8236           p = GAP_END_ADDR;
8237         }
8238
8239       c = STRING_CHAR_ADVANCE (p);
8240       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8241           && ! char_charset (translate_char (translation_table, c),
8242                              charset_list, NULL))
8243         {
8244           positions = Fcons (make_number (from), positions);
8245           n--;
8246           if (n == 0)
8247             break;
8248         }
8249
8250       from++;
8251     }
8252
8253   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8254 }
8255
8256
8257 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8258        Scheck_coding_systems_region, 3, 3, 0,
8259        doc: /* Check if the region is encodable by coding systems.
8260
8261 START and END are buffer positions specifying the region.
8262 CODING-SYSTEM-LIST is a list of coding systems to check.
8263
8264 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8265 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8266 whole region, POS0, POS1, ... are buffer positions where non-encodable
8267 characters are found.
8268
8269 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8270 value is nil.
8271
8272 START may be a string.  In that case, check if the string is
8273 encodable, and the value contains indices to the string instead of
8274 buffer positions.  END is ignored.  */)
8275      (start, end, coding_system_list)
8276      Lisp_Object start, end, coding_system_list;
8277 {
8278   Lisp_Object list;
8279   EMACS_INT start_byte, end_byte;
8280   int pos;
8281   const unsigned char *p, *pbeg, *pend;
8282   int c;
8283   Lisp_Object tail, elt, attrs;
8284
8285   if (STRINGP (start))
8286     {
8287       if (!STRING_MULTIBYTE (start)
8288           && SCHARS (start) != SBYTES (start))
8289         return Qnil;
8290       start_byte = 0;
8291       end_byte = SBYTES (start);
8292       pos = 0;
8293     }
8294   else
8295     {
8296       CHECK_NUMBER_COERCE_MARKER (start);
8297       CHECK_NUMBER_COERCE_MARKER (end);
8298       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8299         args_out_of_range (start, end);
8300       if (NILP (current_buffer->enable_multibyte_characters))
8301         return Qnil;
8302       start_byte = CHAR_TO_BYTE (XINT (start));
8303       end_byte = CHAR_TO_BYTE (XINT (end));
8304       if (XINT (end) - XINT (start) == end_byte - start_byte)
8305         return Qt;
8306
8307       if (XINT (start) < GPT && XINT (end) > GPT)
8308         {
8309           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8310             move_gap_both (XINT (start), start_byte);
8311           else
8312             move_gap_both (XINT (end), end_byte);
8313         }
8314       pos = XINT (start);
8315     }
8316
8317   list = Qnil;
8318   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8319     {
8320       elt = XCAR (tail);
8321       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8322       ASET (attrs, coding_attr_trans_tbl,
8323             get_translation_table (attrs, 1, NULL));
8324       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8325     }
8326
8327   if (STRINGP (start))
8328     p = pbeg = SDATA (start);
8329   else
8330     p = pbeg = BYTE_POS_ADDR (start_byte);
8331   pend = p + (end_byte - start_byte);
8332
8333   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8334   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8335
8336   while (p < pend)
8337     {
8338       if (ASCII_BYTE_P (*p))
8339         p++;
8340       else
8341         {
8342           c = STRING_CHAR_ADVANCE (p);
8343
8344           charset_map_loaded = 0;
8345           for (tail = list; CONSP (tail); tail = XCDR (tail))
8346             {
8347               elt = XCDR (XCAR (tail));
8348               if (! char_encodable_p (c, XCAR (elt)))
8349                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8350             }
8351           if (charset_map_loaded)
8352             {
8353               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8354
8355               if (STRINGP (start))
8356                 pbeg = SDATA (start);
8357               else
8358                 pbeg = BYTE_POS_ADDR (start_byte);
8359               p = pbeg + p_offset;
8360               pend = pbeg + pend_offset;
8361             }
8362         }
8363       pos++;
8364     }
8365
8366   tail = list;
8367   list = Qnil;
8368   for (; CONSP (tail); tail = XCDR (tail))
8369     {
8370       elt = XCAR (tail);
8371       if (CONSP (XCDR (XCDR (elt))))
8372         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8373                       list);
8374     }
8375
8376   return list;
8377 }
8378
8379
8380 Lisp_Object
8381 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8382      Lisp_Object start, end, coding_system, dst_object;
8383      int encodep, norecord;
8384 {
8385   struct coding_system coding;
8386   EMACS_INT from, from_byte, to, to_byte;
8387   Lisp_Object src_object;
8388
8389   CHECK_NUMBER_COERCE_MARKER (start);
8390   CHECK_NUMBER_COERCE_MARKER (end);
8391   if (NILP (coding_system))
8392     coding_system = Qno_conversion;
8393   else
8394     CHECK_CODING_SYSTEM (coding_system);
8395   src_object = Fcurrent_buffer ();
8396   if (NILP (dst_object))
8397     dst_object = src_object;
8398   else if (! EQ (dst_object, Qt))
8399     CHECK_BUFFER (dst_object);
8400
8401   validate_region (&start, &end);
8402   from = XFASTINT (start);
8403   from_byte = CHAR_TO_BYTE (from);
8404   to = XFASTINT (end);
8405   to_byte = CHAR_TO_BYTE (to);
8406
8407   setup_coding_system (coding_system, &coding);
8408   coding.mode |= CODING_MODE_LAST_BLOCK;
8409
8410   if (encodep)
8411     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8412                           dst_object);
8413   else
8414     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8415                           dst_object);
8416   if (! norecord)
8417     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8418
8419   return (BUFFERP (dst_object)
8420           ? make_number (coding.produced_char)
8421           : coding.dst_object);
8422 }
8423
8424
8425 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8426        3, 4, "r\nzCoding system: ",
8427        doc: /* Decode the current region from the specified coding system.
8428 When called from a program, takes four arguments:
8429         START, END, CODING-SYSTEM, and DESTINATION.
8430 START and END are buffer positions.
8431
8432 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8433 If nil, the region between START and END is replaced by the decoded text.
8434 If buffer, the decoded text is inserted in the buffer.
8435 In those cases, the length of the decoded text is returned.
8436 If DESTINATION is t, the decoded text is returned.
8437
8438 This function sets `last-coding-system-used' to the precise coding system
8439 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8440 not fully specified.)  */)
8441      (start, end, coding_system, destination)
8442      Lisp_Object start, end, coding_system, destination;
8443 {
8444   return code_convert_region (start, end, coding_system, destination, 0, 0);
8445 }
8446
8447 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8448        3, 4, "r\nzCoding system: ",
8449        doc: /* Encode the current region by specified coding system.
8450 When called from a program, takes four arguments:
8451         START, END, CODING-SYSTEM and DESTINATION.
8452 START and END are buffer positions.
8453
8454 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8455 If nil, the region between START and END is replace by the encoded text.
8456 If buffer, the encoded text is inserted in the buffer.
8457 In those cases, the length of the encoded text is returned.
8458 If DESTINATION is t, the encoded text is returned.
8459
8460 This function sets `last-coding-system-used' to the precise coding system
8461 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8462 not fully specified.)  */)
8463   (start, end, coding_system, destination)
8464      Lisp_Object start, end, coding_system, destination;
8465 {
8466   return code_convert_region (start, end, coding_system, destination, 1, 0);
8467 }
8468
8469 Lisp_Object
8470 code_convert_string (string, coding_system, dst_object,
8471                      encodep, nocopy, norecord)
8472      Lisp_Object string, coding_system, dst_object;
8473      int encodep, nocopy, norecord;
8474 {
8475   struct coding_system coding;
8476   EMACS_INT chars, bytes;
8477
8478   CHECK_STRING (string);
8479   if (NILP (coding_system))
8480     {
8481       if (! norecord)
8482         Vlast_coding_system_used = Qno_conversion;
8483       if (NILP (dst_object))
8484         return (nocopy ? Fcopy_sequence (string) : string);
8485     }
8486
8487   if (NILP (coding_system))
8488     coding_system = Qno_conversion;
8489   else
8490     CHECK_CODING_SYSTEM (coding_system);
8491   if (NILP (dst_object))
8492     dst_object = Qt;
8493   else if (! EQ (dst_object, Qt))
8494     CHECK_BUFFER (dst_object);
8495
8496   setup_coding_system (coding_system, &coding);
8497   coding.mode |= CODING_MODE_LAST_BLOCK;
8498   chars = SCHARS (string);
8499   bytes = SBYTES (string);
8500   if (encodep)
8501     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8502   else
8503     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8504   if (! norecord)
8505     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8506
8507   return (BUFFERP (dst_object)
8508           ? make_number (coding.produced_char)
8509           : coding.dst_object);
8510 }
8511
8512
8513 /* Encode or decode STRING according to CODING_SYSTEM.
8514    Do not set Vlast_coding_system_used.
8515
8516    This function is called only from macros DECODE_FILE and
8517    ENCODE_FILE, thus we ignore character composition.  */
8518
8519 Lisp_Object
8520 code_convert_string_norecord (string, coding_system, encodep)
8521      Lisp_Object string, coding_system;
8522      int encodep;
8523 {
8524   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
8525 }
8526
8527
8528 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8529        2, 4, 0,
8530        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8531
8532 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8533 if the decoding operation is trivial.
8534
8535 Optional fourth arg BUFFER non-nil means that the decoded text is
8536 inserted in BUFFER instead of returned as a string.  In this case,
8537 the return value is the length of the decoded text.
8538
8539 This function sets `last-coding-system-used' to the precise coding system
8540 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8541 not fully specified.)  */)
8542   (string, coding_system, nocopy, buffer)
8543      Lisp_Object string, coding_system, nocopy, buffer;
8544 {
8545   return code_convert_string (string, coding_system, buffer,
8546                               0, ! NILP (nocopy), 0);
8547 }
8548
8549 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8550        2, 4, 0,
8551        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8552
8553 Optional third arg NOCOPY non-nil means it is OK to return STRING
8554 itself if the encoding operation is trivial.
8555
8556 Optional fourth arg BUFFER non-nil means that the encoded text is
8557 inserted in BUFFER instead of returned as a string.  In this case,
8558 the return value is the length of the encoded text.
8559
8560 This function sets `last-coding-system-used' to the precise coding system
8561 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8562 not fully specified.)  */)
8563      (string, coding_system, nocopy, buffer)
8564      Lisp_Object string, coding_system, nocopy, buffer;
8565 {
8566   return code_convert_string (string, coding_system, buffer,
8567                               1, ! NILP (nocopy), 1);
8568 }
8569
8570 \f
8571 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
8572        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8573 Return the corresponding character.  */)
8574      (code)
8575      Lisp_Object code;
8576 {
8577   Lisp_Object spec, attrs, val;
8578   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8579   int c;
8580
8581   CHECK_NATNUM (code);
8582   c = XFASTINT (code);
8583   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8584   attrs = AREF (spec, 0);
8585
8586   if (ASCII_BYTE_P (c)
8587       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8588     return code;
8589
8590   val = CODING_ATTR_CHARSET_LIST (attrs);
8591   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8592   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8593   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
8594
8595   if (c <= 0x7F)
8596     charset = charset_roman;
8597   else if (c >= 0xA0 && c < 0xDF)
8598     {
8599       charset = charset_kana;
8600       c -= 0x80;
8601     }
8602   else
8603     {
8604       int s1 = c >> 8, s2 = c & 0xFF;
8605
8606       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8607           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8608         error ("Invalid code: %d", code);
8609       SJIS_TO_JIS (c);
8610       charset = charset_kanji;
8611     }
8612   c = DECODE_CHAR (charset, c);
8613   if (c < 0)
8614     error ("Invalid code: %d", code);
8615   return make_number (c);
8616 }
8617
8618
8619 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8620        doc: /* Encode a Japanese character CH to shift_jis encoding.
8621 Return the corresponding code in SJIS.  */)
8622      (ch)
8623     Lisp_Object ch;
8624 {
8625   Lisp_Object spec, attrs, charset_list;
8626   int c;
8627   struct charset *charset;
8628   unsigned code;
8629
8630   CHECK_CHARACTER (ch);
8631   c = XFASTINT (ch);
8632   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8633   attrs = AREF (spec, 0);
8634
8635   if (ASCII_CHAR_P (c)
8636       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8637     return ch;
8638
8639   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8640   charset = char_charset (c, charset_list, &code);
8641   if (code == CHARSET_INVALID_CODE (charset))
8642     error ("Can't encode by shift_jis encoding: %d", c);
8643   JIS_TO_SJIS (code);
8644
8645   return make_number (code);
8646 }
8647
8648 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
8649        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8650 Return the corresponding character.  */)
8651      (code)
8652      Lisp_Object code;
8653 {
8654   Lisp_Object spec, attrs, val;
8655   struct charset *charset_roman, *charset_big5, *charset;
8656   int c;
8657
8658   CHECK_NATNUM (code);
8659   c = XFASTINT (code);
8660   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8661   attrs = AREF (spec, 0);
8662
8663   if (ASCII_BYTE_P (c)
8664       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8665     return code;
8666
8667   val = CODING_ATTR_CHARSET_LIST (attrs);
8668   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8669   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
8670
8671   if (c <= 0x7F)
8672     charset = charset_roman;
8673   else
8674     {
8675       int b1 = c >> 8, b2 = c & 0x7F;
8676       if (b1 < 0xA1 || b1 > 0xFE
8677           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8678         error ("Invalid code: %d", code);
8679       charset = charset_big5;
8680     }
8681   c = DECODE_CHAR (charset, (unsigned )c);
8682   if (c < 0)
8683     error ("Invalid code: %d", code);
8684   return make_number (c);
8685 }
8686
8687 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8688        doc: /* Encode the Big5 character CH to BIG5 coding system.
8689 Return the corresponding character code in Big5.  */)
8690      (ch)
8691      Lisp_Object ch;
8692 {
8693   Lisp_Object spec, attrs, charset_list;
8694   struct charset *charset;
8695   int c;
8696   unsigned code;
8697
8698   CHECK_CHARACTER (ch);
8699   c = XFASTINT (ch);
8700   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8701   attrs = AREF (spec, 0);
8702   if (ASCII_CHAR_P (c)
8703       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8704     return ch;
8705
8706   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8707   charset = char_charset (c, charset_list, &code);
8708   if (code == CHARSET_INVALID_CODE (charset))
8709     error ("Can't encode by Big5 encoding: %d", c);
8710
8711   return make_number (code);
8712 }
8713
8714 \f
8715 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
8716        Sset_terminal_coding_system_internal, 1, 2, 0,
8717        doc: /* Internal use only.  */)
8718      (coding_system, terminal)
8719      Lisp_Object coding_system;
8720      Lisp_Object terminal;
8721 {
8722   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8723   CHECK_SYMBOL (coding_system);
8724   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
8725   /* We had better not send unsafe characters to terminal.  */
8726   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
8727   /* Characer composition should be disabled.  */
8728   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8729   terminal_coding->src_multibyte = 1;
8730   terminal_coding->dst_multibyte = 0;
8731   return Qnil;
8732 }
8733
8734 DEFUN ("set-safe-terminal-coding-system-internal",
8735        Fset_safe_terminal_coding_system_internal,
8736        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8737        doc: /* Internal use only.  */)
8738      (coding_system)
8739      Lisp_Object coding_system;
8740 {
8741   CHECK_SYMBOL (coding_system);
8742   setup_coding_system (Fcheck_coding_system (coding_system),
8743                        &safe_terminal_coding);
8744   /* Characer composition should be disabled.  */
8745   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8746   safe_terminal_coding.src_multibyte = 1;
8747   safe_terminal_coding.dst_multibyte = 0;
8748   return Qnil;
8749 }
8750
8751 DEFUN ("terminal-coding-system", Fterminal_coding_system,
8752        Sterminal_coding_system, 0, 1, 0,
8753        doc: /* Return coding system specified for terminal output on the given terminal.
8754 TERMINAL may be a terminal id, a frame, or nil for the selected
8755 frame's terminal device.  */)
8756      (terminal)
8757      Lisp_Object terminal;
8758 {
8759   struct coding_system *terminal_coding
8760     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8761   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
8762
8763   /* For backward compatibility, return nil if it is `undecided'. */
8764   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
8765 }
8766
8767 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
8768        Sset_keyboard_coding_system_internal, 1, 2, 0,
8769        doc: /* Internal use only.  */)
8770      (coding_system, terminal)
8771      Lisp_Object coding_system;
8772      Lisp_Object terminal;
8773 {
8774   struct terminal *t = get_terminal (terminal, 1);
8775   CHECK_SYMBOL (coding_system);
8776   setup_coding_system (Fcheck_coding_system (coding_system),
8777                        TERMINAL_KEYBOARD_CODING (t));
8778   /* Characer composition should be disabled.  */
8779   TERMINAL_KEYBOARD_CODING (t)->common_flags
8780     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8781   return Qnil;
8782 }
8783
8784 DEFUN ("keyboard-coding-system",
8785        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
8786        doc: /* Return coding system specified for decoding keyboard input.  */)
8787      (terminal)
8788      Lisp_Object terminal;
8789 {
8790   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8791                          (get_terminal (terminal, 1))->id);
8792 }
8793
8794 \f
8795 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8796        Sfind_operation_coding_system,  1, MANY, 0,
8797        doc: /* Choose a coding system for an operation based on the target name.
8798 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8799 DECODING-SYSTEM is the coding system to use for decoding
8800 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8801 for encoding (in case OPERATION does encoding).
8802
8803 The first argument OPERATION specifies an I/O primitive:
8804   For file I/O, `insert-file-contents' or `write-region'.
8805   For process I/O, `call-process', `call-process-region', or `start-process'.
8806   For network I/O, `open-network-stream'.
8807
8808 The remaining arguments should be the same arguments that were passed
8809 to the primitive.  Depending on which primitive, one of those arguments
8810 is selected as the TARGET.  For example, if OPERATION does file I/O,
8811 whichever argument specifies the file name is TARGET.
8812
8813 TARGET has a meaning which depends on OPERATION:
8814   For file I/O, TARGET is a file name (except for the special case below).
8815   For process I/O, TARGET is a process name.
8816   For network I/O, TARGET is a service name or a port number.
8817
8818 This function looks up what is specified for TARGET in
8819 `file-coding-system-alist', `process-coding-system-alist',
8820 or `network-coding-system-alist' depending on OPERATION.
8821 They may specify a coding system, a cons of coding systems,
8822 or a function symbol to call.
8823 In the last case, we call the function with one argument,
8824 which is a list of all the arguments given to this function.
8825 If the function can't decide a coding system, it can return
8826 `undecided' so that the normal code-detection is performed.
8827
8828 If OPERATION is `insert-file-contents', the argument corresponding to
8829 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
8830 file name to look up, and BUFFER is a buffer that contains the file's
8831 contents (not yet decoded).  If `file-coding-system-alist' specifies a
8832 function to call for FILENAME, that function should examine the
8833 contents of BUFFER instead of reading the file.
8834
8835 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
8836      (nargs, args)
8837      int nargs;
8838      Lisp_Object *args;
8839 {
8840   Lisp_Object operation, target_idx, target, val;
8841   register Lisp_Object chain;
8842
8843   if (nargs < 2)
8844     error ("Too few arguments");
8845   operation = args[0];
8846   if (!SYMBOLP (operation)
8847       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8848     error ("Invalid first argument");
8849   if (nargs < 1 + XINT (target_idx))
8850     error ("Too few arguments for operation: %s",
8851            SDATA (SYMBOL_NAME (operation)));
8852   target = args[XINT (target_idx) + 1];
8853   if (!(STRINGP (target)
8854         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8855             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
8856         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8857     error ("Invalid %dth argument", XINT (target_idx) + 1);
8858   if (CONSP (target))
8859     target = XCAR (target);
8860
8861   chain = ((EQ (operation, Qinsert_file_contents)
8862             || EQ (operation, Qwrite_region))
8863            ? Vfile_coding_system_alist
8864            : (EQ (operation, Qopen_network_stream)
8865               ? Vnetwork_coding_system_alist
8866               : Vprocess_coding_system_alist));
8867   if (NILP (chain))
8868     return Qnil;
8869
8870   for (; CONSP (chain); chain = XCDR (chain))
8871     {
8872       Lisp_Object elt;
8873
8874       elt = XCAR (chain);
8875       if (CONSP (elt)
8876           && ((STRINGP (target)
8877                && STRINGP (XCAR (elt))
8878                && fast_string_match (XCAR (elt), target) >= 0)
8879               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8880         {
8881           val = XCDR (elt);
8882           /* Here, if VAL is both a valid coding system and a valid
8883              function symbol, we return VAL as a coding system.  */
8884           if (CONSP (val))
8885             return val;
8886           if (! SYMBOLP (val))
8887             return Qnil;
8888           if (! NILP (Fcoding_system_p (val)))
8889             return Fcons (val, val);
8890           if (! NILP (Ffboundp (val)))
8891             {
8892               /* We use call1 rather than safe_call1
8893                  so as to get bug reports about functions called here
8894                  which don't handle the current interface.  */
8895               val = call1 (val, Flist (nargs, args));
8896               if (CONSP (val))
8897                 return val;
8898               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8899                 return Fcons (val, val);
8900             }
8901           return Qnil;
8902         }
8903     }
8904   return Qnil;
8905 }
8906
8907 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8908        Sset_coding_system_priority, 0, MANY, 0,
8909        doc: /* Assign higher priority to the coding systems given as arguments.
8910 If multiple coding systems belong to the same category,
8911 all but the first one are ignored.
8912
8913 usage: (set-coding-system-priority &rest coding-systems)  */)
8914      (nargs, args)
8915      int nargs;
8916      Lisp_Object *args;
8917 {
8918   int i, j;
8919   int changed[coding_category_max];
8920   enum coding_category priorities[coding_category_max];
8921
8922   bzero (changed, sizeof changed);
8923
8924   for (i = j = 0; i < nargs; i++)
8925     {
8926       enum coding_category category;
8927       Lisp_Object spec, attrs;
8928
8929       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8930       attrs = AREF (spec, 0);
8931       category = XINT (CODING_ATTR_CATEGORY (attrs));
8932       if (changed[category])
8933         /* Ignore this coding system because a coding system of the
8934            same category already had a higher priority.  */
8935         continue;
8936       changed[category] = 1;
8937       priorities[j++] = category;
8938       if (coding_categories[category].id >= 0
8939           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8940         setup_coding_system (args[i], &coding_categories[category]);
8941       Fset (AREF (Vcoding_category_table, category), args[i]);
8942     }
8943
8944   /* Now we have decided top J priorities.  Reflect the order of the
8945      original priorities to the remaining priorities.  */
8946
8947   for (i = j, j = 0; i < coding_category_max; i++, j++)
8948     {
8949       while (j < coding_category_max
8950              && changed[coding_priorities[j]])
8951         j++;
8952       if (j == coding_category_max)
8953         abort ();
8954       priorities[i] = coding_priorities[j];
8955     }
8956
8957   bcopy (priorities, coding_priorities, sizeof priorities);
8958
8959   /* Update `coding-category-list'.  */
8960   Vcoding_category_list = Qnil;
8961   for (i = coding_category_max - 1; i >= 0; i--)
8962     Vcoding_category_list
8963       = Fcons (AREF (Vcoding_category_table, priorities[i]),
8964                Vcoding_category_list);
8965
8966   return Qnil;
8967 }
8968
8969 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8970        Scoding_system_priority_list, 0, 1, 0,
8971        doc: /* Return a list of coding systems ordered by their priorities.
8972 HIGHESTP non-nil means just return the highest priority one.  */)
8973      (highestp)
8974      Lisp_Object highestp;
8975 {
8976   int i;
8977   Lisp_Object val;
8978
8979   for (i = 0, val = Qnil; i < coding_category_max; i++)
8980     {
8981       enum coding_category category = coding_priorities[i];
8982       int id = coding_categories[category].id;
8983       Lisp_Object attrs;
8984
8985       if (id < 0)
8986         continue;
8987       attrs = CODING_ID_ATTRS (id);
8988       if (! NILP (highestp))
8989         return CODING_ATTR_BASE_NAME (attrs);
8990       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8991     }
8992   return Fnreverse (val);
8993 }
8994
8995 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8996
8997 static Lisp_Object
8998 make_subsidiaries (base)
8999      Lisp_Object base;
9000 {
9001   Lisp_Object subsidiaries;
9002   int base_name_len = SBYTES (SYMBOL_NAME (base));
9003   char *buf = (char *) alloca (base_name_len + 6);
9004   int i;
9005
9006   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9007   subsidiaries = Fmake_vector (make_number (3), Qnil);
9008   for (i = 0; i < 3; i++)
9009     {
9010       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9011       ASET (subsidiaries, i, intern (buf));
9012     }
9013   return subsidiaries;
9014 }
9015
9016
9017 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9018        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9019        doc: /* For internal use only.
9020 usage: (define-coding-system-internal ...)  */)
9021      (nargs, args)
9022      int nargs;
9023      Lisp_Object *args;
9024 {
9025   Lisp_Object name;
9026   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9027   Lisp_Object attrs;            /* Vector of attributes.  */
9028   Lisp_Object eol_type;
9029   Lisp_Object aliases;
9030   Lisp_Object coding_type, charset_list, safe_charsets;
9031   enum coding_category category;
9032   Lisp_Object tail, val;
9033   int max_charset_id = 0;
9034   int i;
9035
9036   if (nargs < coding_arg_max)
9037     goto short_args;
9038
9039   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9040
9041   name = args[coding_arg_name];
9042   CHECK_SYMBOL (name);
9043   CODING_ATTR_BASE_NAME (attrs) = name;
9044
9045   val = args[coding_arg_mnemonic];
9046   if (! STRINGP (val))
9047     CHECK_CHARACTER (val);
9048   CODING_ATTR_MNEMONIC (attrs) = val;
9049
9050   coding_type = args[coding_arg_coding_type];
9051   CHECK_SYMBOL (coding_type);
9052   CODING_ATTR_TYPE (attrs) = coding_type;
9053
9054   charset_list = args[coding_arg_charset_list];
9055   if (SYMBOLP (charset_list))
9056     {
9057       if (EQ (charset_list, Qiso_2022))
9058         {
9059           if (! EQ (coding_type, Qiso_2022))
9060             error ("Invalid charset-list");
9061           charset_list = Viso_2022_charset_list;
9062         }
9063       else if (EQ (charset_list, Qemacs_mule))
9064         {
9065           if (! EQ (coding_type, Qemacs_mule))
9066             error ("Invalid charset-list");
9067           charset_list = Vemacs_mule_charset_list;
9068         }
9069       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9070         if (max_charset_id < XFASTINT (XCAR (tail)))
9071           max_charset_id = XFASTINT (XCAR (tail));
9072     }
9073   else
9074     {
9075       charset_list = Fcopy_sequence (charset_list);
9076       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9077         {
9078           struct charset *charset;
9079
9080           val = XCAR (tail);
9081           CHECK_CHARSET_GET_CHARSET (val, charset);
9082           if (EQ (coding_type, Qiso_2022)
9083               ? CHARSET_ISO_FINAL (charset) < 0
9084               : EQ (coding_type, Qemacs_mule)
9085               ? CHARSET_EMACS_MULE_ID (charset) < 0
9086               : 0)
9087             error ("Can't handle charset `%s'",
9088                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9089
9090           XSETCAR (tail, make_number (charset->id));
9091           if (max_charset_id < charset->id)
9092             max_charset_id = charset->id;
9093         }
9094     }
9095   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9096
9097   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
9098                                 make_number (255));
9099   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9100     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9101   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9102
9103   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9104
9105   val = args[coding_arg_decode_translation_table];
9106   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9107     CHECK_SYMBOL (val);
9108   CODING_ATTR_DECODE_TBL (attrs) = val;
9109
9110   val = args[coding_arg_encode_translation_table];
9111   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9112     CHECK_SYMBOL (val);
9113   CODING_ATTR_ENCODE_TBL (attrs) = val;
9114
9115   val = args[coding_arg_post_read_conversion];
9116   CHECK_SYMBOL (val);
9117   CODING_ATTR_POST_READ (attrs) = val;
9118
9119   val = args[coding_arg_pre_write_conversion];
9120   CHECK_SYMBOL (val);
9121   CODING_ATTR_PRE_WRITE (attrs) = val;
9122
9123   val = args[coding_arg_default_char];
9124   if (NILP (val))
9125     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9126   else
9127     {
9128       CHECK_CHARACTER (val);
9129       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9130     }
9131
9132   val = args[coding_arg_for_unibyte];
9133   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9134
9135   val = args[coding_arg_plist];
9136   CHECK_LIST (val);
9137   CODING_ATTR_PLIST (attrs) = val;
9138
9139   if (EQ (coding_type, Qcharset))
9140     {
9141       /* Generate a lisp vector of 256 elements.  Each element is nil,
9142          integer, or a list of charset IDs.
9143
9144          If Nth element is nil, the byte code N is invalid in this
9145          coding system.
9146
9147          If Nth element is a number NUM, N is the first byte of a
9148          charset whose ID is NUM.
9149
9150          If Nth element is a list of charset IDs, N is the first byte
9151          of one of them.  The list is sorted by dimensions of the
9152          charsets.  A charset of smaller dimension comes firtst. */
9153       val = Fmake_vector (make_number (256), Qnil);
9154
9155       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9156         {
9157           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9158           int dim = CHARSET_DIMENSION (charset);
9159           int idx = (dim - 1) * 4;
9160
9161           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9162             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9163
9164           for (i = charset->code_space[idx];
9165                i <= charset->code_space[idx + 1]; i++)
9166             {
9167               Lisp_Object tmp, tmp2;
9168               int dim2;
9169
9170               tmp = AREF (val, i);
9171               if (NILP (tmp))
9172                 tmp = XCAR (tail);
9173               else if (NUMBERP (tmp))
9174                 {
9175                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9176                   if (dim < dim2)
9177                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9178                   else
9179                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9180                 }
9181               else
9182                 {
9183                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9184                     {
9185                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9186                       if (dim < dim2)
9187                         break;
9188                     }
9189                   if (NILP (tmp2))
9190                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9191                   else
9192                     {
9193                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9194                       XSETCAR (tmp2, XCAR (tail));
9195                     }
9196                 }
9197               ASET (val, i, tmp);
9198             }
9199         }
9200       ASET (attrs, coding_attr_charset_valids, val);
9201       category = coding_category_charset;
9202     }
9203   else if (EQ (coding_type, Qccl))
9204     {
9205       Lisp_Object valids;
9206
9207       if (nargs < coding_arg_ccl_max)
9208         goto short_args;
9209
9210       val = args[coding_arg_ccl_decoder];
9211       CHECK_CCL_PROGRAM (val);
9212       if (VECTORP (val))
9213         val = Fcopy_sequence (val);
9214       ASET (attrs, coding_attr_ccl_decoder, val);
9215
9216       val = args[coding_arg_ccl_encoder];
9217       CHECK_CCL_PROGRAM (val);
9218       if (VECTORP (val))
9219         val = Fcopy_sequence (val);
9220       ASET (attrs, coding_attr_ccl_encoder, val);
9221
9222       val = args[coding_arg_ccl_valids];
9223       valids = Fmake_string (make_number (256), make_number (0));
9224       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9225         {
9226           int from, to;
9227
9228           val = Fcar (tail);
9229           if (INTEGERP (val))
9230             {
9231               from = to = XINT (val);
9232               if (from < 0 || from > 255)
9233                 args_out_of_range_3 (val, make_number (0), make_number (255));
9234             }
9235           else
9236             {
9237               CHECK_CONS (val);
9238               CHECK_NATNUM_CAR (val);
9239               CHECK_NATNUM_CDR (val);
9240               from = XINT (XCAR (val));
9241               if (from > 255)
9242                 args_out_of_range_3 (XCAR (val),
9243                                      make_number (0), make_number (255));
9244               to = XINT (XCDR (val));
9245               if (to < from || to > 255)
9246                 args_out_of_range_3 (XCDR (val),
9247                                      XCAR (val), make_number (255));
9248             }
9249           for (i = from; i <= to; i++)
9250             SSET (valids, i, 1);
9251         }
9252       ASET (attrs, coding_attr_ccl_valids, valids);
9253
9254       category = coding_category_ccl;
9255     }
9256   else if (EQ (coding_type, Qutf_16))
9257     {
9258       Lisp_Object bom, endian;
9259
9260       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9261
9262       if (nargs < coding_arg_utf16_max)
9263         goto short_args;
9264
9265       bom = args[coding_arg_utf16_bom];
9266       if (! NILP (bom) && ! EQ (bom, Qt))
9267         {
9268           CHECK_CONS (bom);
9269           val = XCAR (bom);
9270           CHECK_CODING_SYSTEM (val);
9271           val = XCDR (bom);
9272           CHECK_CODING_SYSTEM (val);
9273         }
9274       ASET (attrs, coding_attr_utf_bom, bom);
9275
9276       endian = args[coding_arg_utf16_endian];
9277       CHECK_SYMBOL (endian);
9278       if (NILP (endian))
9279         endian = Qbig;
9280       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9281         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9282       ASET (attrs, coding_attr_utf_16_endian, endian);
9283
9284       category = (CONSP (bom)
9285                   ? coding_category_utf_16_auto
9286                   : NILP (bom)
9287                   ? (EQ (endian, Qbig)
9288                      ? coding_category_utf_16_be_nosig
9289                      : coding_category_utf_16_le_nosig)
9290                   : (EQ (endian, Qbig)
9291                      ? coding_category_utf_16_be
9292                      : coding_category_utf_16_le));
9293     }
9294   else if (EQ (coding_type, Qiso_2022))
9295     {
9296       Lisp_Object initial, reg_usage, request, flags;
9297       int i;
9298
9299       if (nargs < coding_arg_iso2022_max)
9300         goto short_args;
9301
9302       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9303       CHECK_VECTOR (initial);
9304       for (i = 0; i < 4; i++)
9305         {
9306           val = Faref (initial, make_number (i));
9307           if (! NILP (val))
9308             {
9309               struct charset *charset;
9310
9311               CHECK_CHARSET_GET_CHARSET (val, charset);
9312               ASET (initial, i, make_number (CHARSET_ID (charset)));
9313               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9314                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9315             }
9316           else
9317             ASET (initial, i, make_number (-1));
9318         }
9319
9320       reg_usage = args[coding_arg_iso2022_reg_usage];
9321       CHECK_CONS (reg_usage);
9322       CHECK_NUMBER_CAR (reg_usage);
9323       CHECK_NUMBER_CDR (reg_usage);
9324
9325       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9326       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9327         {
9328           int id;
9329           Lisp_Object tmp;
9330
9331           val = Fcar (tail);
9332           CHECK_CONS (val);
9333           tmp = XCAR (val);
9334           CHECK_CHARSET_GET_ID (tmp, id);
9335           CHECK_NATNUM_CDR (val);
9336           if (XINT (XCDR (val)) >= 4)
9337             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9338           XSETCAR (val, make_number (id));
9339         }
9340
9341       flags = args[coding_arg_iso2022_flags];
9342       CHECK_NATNUM (flags);
9343       i = XINT (flags);
9344       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9345         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9346
9347       ASET (attrs, coding_attr_iso_initial, initial);
9348       ASET (attrs, coding_attr_iso_usage, reg_usage);
9349       ASET (attrs, coding_attr_iso_request, request);
9350       ASET (attrs, coding_attr_iso_flags, flags);
9351       setup_iso_safe_charsets (attrs);
9352
9353       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9354         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9355                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9356                     ? coding_category_iso_7_else
9357                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9358                     ? coding_category_iso_7
9359                     : coding_category_iso_7_tight);
9360       else
9361         {
9362           int id = XINT (AREF (initial, 1));
9363
9364           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9365                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9366                        || id < 0)
9367                       ? coding_category_iso_8_else
9368                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9369                       ? coding_category_iso_8_1
9370                       : coding_category_iso_8_2);
9371         }
9372       if (category != coding_category_iso_8_1
9373           && category != coding_category_iso_8_2)
9374         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9375     }
9376   else if (EQ (coding_type, Qemacs_mule))
9377     {
9378       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9379         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9380       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9381       category = coding_category_emacs_mule;
9382     }
9383   else if (EQ (coding_type, Qshift_jis))
9384     {
9385
9386       struct charset *charset;
9387
9388       if (XINT (Flength (charset_list)) != 3
9389           && XINT (Flength (charset_list)) != 4)
9390         error ("There should be three or four charsets");
9391
9392       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9393       if (CHARSET_DIMENSION (charset) != 1)
9394         error ("Dimension of charset %s is not one",
9395                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9396       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9397         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9398
9399       charset_list = XCDR (charset_list);
9400       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9401       if (CHARSET_DIMENSION (charset) != 1)
9402         error ("Dimension of charset %s is not one",
9403                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9404
9405       charset_list = XCDR (charset_list);
9406       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9407       if (CHARSET_DIMENSION (charset) != 2)
9408         error ("Dimension of charset %s is not two",
9409                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9410
9411       charset_list = XCDR (charset_list);
9412       if (! NILP (charset_list))
9413         {
9414           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9415           if (CHARSET_DIMENSION (charset) != 2)
9416             error ("Dimension of charset %s is not two",
9417                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9418         }
9419
9420       category = coding_category_sjis;
9421       Vsjis_coding_system = name;
9422     }
9423   else if (EQ (coding_type, Qbig5))
9424     {
9425       struct charset *charset;
9426
9427       if (XINT (Flength (charset_list)) != 2)
9428         error ("There should be just two charsets");
9429
9430       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9431       if (CHARSET_DIMENSION (charset) != 1)
9432         error ("Dimension of charset %s is not one",
9433                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9434       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9435         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9436
9437       charset_list = XCDR (charset_list);
9438       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9439       if (CHARSET_DIMENSION (charset) != 2)
9440         error ("Dimension of charset %s is not two",
9441                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9442
9443       category = coding_category_big5;
9444       Vbig5_coding_system = name;
9445     }
9446   else if (EQ (coding_type, Qraw_text))
9447     {
9448       category = coding_category_raw_text;
9449       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9450     }
9451   else if (EQ (coding_type, Qutf_8))
9452     {
9453       Lisp_Object bom;
9454
9455       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9456
9457       if (nargs < coding_arg_utf8_max)
9458         goto short_args;
9459
9460       bom = args[coding_arg_utf8_bom];
9461       if (! NILP (bom) && ! EQ (bom, Qt))
9462         {
9463           CHECK_CONS (bom);
9464           val = XCAR (bom);
9465           CHECK_CODING_SYSTEM (val);
9466           val = XCDR (bom);
9467           CHECK_CODING_SYSTEM (val);
9468         }
9469       ASET (attrs, coding_attr_utf_bom, bom);
9470
9471       category = (CONSP (bom) ? coding_category_utf_8_auto
9472                   : NILP (bom) ? coding_category_utf_8_nosig
9473                   : coding_category_utf_8_sig);
9474     }
9475   else if (EQ (coding_type, Qundecided))
9476     category = coding_category_undecided;
9477   else
9478     error ("Invalid coding system type: %s",
9479            SDATA (SYMBOL_NAME (coding_type)));
9480
9481   CODING_ATTR_CATEGORY (attrs) = make_number (category);
9482   CODING_ATTR_PLIST (attrs)
9483     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9484                                 CODING_ATTR_PLIST (attrs)));
9485   CODING_ATTR_PLIST (attrs)
9486     = Fcons (QCascii_compatible_p,
9487              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9488                     CODING_ATTR_PLIST (attrs)));
9489
9490   eol_type = args[coding_arg_eol_type];
9491   if (! NILP (eol_type)
9492       && ! EQ (eol_type, Qunix)
9493       && ! EQ (eol_type, Qdos)
9494       && ! EQ (eol_type, Qmac))
9495     error ("Invalid eol-type");
9496
9497   aliases = Fcons (name, Qnil);
9498
9499   if (NILP (eol_type))
9500     {
9501       eol_type = make_subsidiaries (name);
9502       for (i = 0; i < 3; i++)
9503         {
9504           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9505
9506           this_name = AREF (eol_type, i);
9507           this_aliases = Fcons (this_name, Qnil);
9508           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9509           this_spec = Fmake_vector (make_number (3), attrs);
9510           ASET (this_spec, 1, this_aliases);
9511           ASET (this_spec, 2, this_eol_type);
9512           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9513           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
9514           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9515           if (NILP (val))
9516             Vcoding_system_alist
9517               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9518                        Vcoding_system_alist);
9519         }
9520     }
9521
9522   spec_vec = Fmake_vector (make_number (3), attrs);
9523   ASET (spec_vec, 1, aliases);
9524   ASET (spec_vec, 2, eol_type);
9525
9526   Fputhash (name, spec_vec, Vcoding_system_hash_table);
9527   Vcoding_system_list = Fcons (name, Vcoding_system_list);
9528   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9529   if (NILP (val))
9530     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9531                                   Vcoding_system_alist);
9532
9533   {
9534     int id = coding_categories[category].id;
9535
9536     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9537       setup_coding_system (name, &coding_categories[category]);
9538   }
9539
9540   return Qnil;
9541
9542  short_args:
9543   return Fsignal (Qwrong_number_of_arguments,
9544                   Fcons (intern ("define-coding-system-internal"),
9545                          make_number (nargs)));
9546 }
9547
9548
9549 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9550        3, 3, 0,
9551        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
9552   (coding_system, prop, val)
9553      Lisp_Object coding_system, prop, val;
9554 {
9555   Lisp_Object spec, attrs;
9556
9557   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9558   attrs = AREF (spec, 0);
9559   if (EQ (prop, QCmnemonic))
9560     {
9561       if (! STRINGP (val))
9562         CHECK_CHARACTER (val);
9563       CODING_ATTR_MNEMONIC (attrs) = val;
9564     }
9565   else if (EQ (prop, QCdefalut_char))
9566     {
9567       if (NILP (val))
9568         val = make_number (' ');
9569       else
9570         CHECK_CHARACTER (val);
9571       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9572     }
9573   else if (EQ (prop, QCdecode_translation_table))
9574     {
9575       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9576         CHECK_SYMBOL (val);
9577       CODING_ATTR_DECODE_TBL (attrs) = val;
9578     }
9579   else if (EQ (prop, QCencode_translation_table))
9580     {
9581       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9582         CHECK_SYMBOL (val);
9583       CODING_ATTR_ENCODE_TBL (attrs) = val;
9584     }
9585   else if (EQ (prop, QCpost_read_conversion))
9586     {
9587       CHECK_SYMBOL (val);
9588       CODING_ATTR_POST_READ (attrs) = val;
9589     }
9590   else if (EQ (prop, QCpre_write_conversion))
9591     {
9592       CHECK_SYMBOL (val);
9593       CODING_ATTR_PRE_WRITE (attrs) = val;
9594     }
9595   else if (EQ (prop, QCascii_compatible_p))
9596     {
9597       CODING_ATTR_ASCII_COMPAT (attrs) = val;
9598     }
9599
9600   CODING_ATTR_PLIST (attrs)
9601     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9602   return val;
9603 }
9604
9605
9606 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9607        Sdefine_coding_system_alias, 2, 2, 0,
9608        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
9609      (alias, coding_system)
9610      Lisp_Object alias, coding_system;
9611 {
9612   Lisp_Object spec, aliases, eol_type, val;
9613
9614   CHECK_SYMBOL (alias);
9615   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9616   aliases = AREF (spec, 1);
9617   /* ALIASES should be a list of length more than zero, and the first
9618      element is a base coding system.  Append ALIAS at the tail of the
9619      list.  */
9620   while (!NILP (XCDR (aliases)))
9621     aliases = XCDR (aliases);
9622   XSETCDR (aliases, Fcons (alias, Qnil));
9623
9624   eol_type = AREF (spec, 2);
9625   if (VECTORP (eol_type))
9626     {
9627       Lisp_Object subsidiaries;
9628       int i;
9629
9630       subsidiaries = make_subsidiaries (alias);
9631       for (i = 0; i < 3; i++)
9632         Fdefine_coding_system_alias (AREF (subsidiaries, i),
9633                                      AREF (eol_type, i));
9634     }
9635
9636   Fputhash (alias, spec, Vcoding_system_hash_table);
9637   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
9638   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9639   if (NILP (val))
9640     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9641                                   Vcoding_system_alist);
9642
9643   return Qnil;
9644 }
9645
9646 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9647        1, 1, 0,
9648        doc: /* Return the base of CODING-SYSTEM.
9649 Any alias or subsidiary coding system is not a base coding system.  */)
9650   (coding_system)
9651      Lisp_Object coding_system;
9652 {
9653   Lisp_Object spec, attrs;
9654
9655   if (NILP (coding_system))
9656     return (Qno_conversion);
9657   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9658   attrs = AREF (spec, 0);
9659   return CODING_ATTR_BASE_NAME (attrs);
9660 }
9661
9662 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9663        1, 1, 0,
9664        doc: "Return the property list of CODING-SYSTEM.")
9665      (coding_system)
9666      Lisp_Object coding_system;
9667 {
9668   Lisp_Object spec, attrs;
9669
9670   if (NILP (coding_system))
9671     coding_system = Qno_conversion;
9672   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9673   attrs = AREF (spec, 0);
9674   return CODING_ATTR_PLIST (attrs);
9675 }
9676
9677
9678 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9679        1, 1, 0,
9680        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
9681      (coding_system)
9682      Lisp_Object coding_system;
9683 {
9684   Lisp_Object spec;
9685
9686   if (NILP (coding_system))
9687     coding_system = Qno_conversion;
9688   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9689   return AREF (spec, 1);
9690 }
9691
9692 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9693        Scoding_system_eol_type, 1, 1, 0,
9694        doc: /* Return eol-type of CODING-SYSTEM.
9695 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
9696
9697 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9698 and CR respectively.
9699
9700 A vector value indicates that a format of end-of-line should be
9701 detected automatically.  Nth element of the vector is the subsidiary
9702 coding system whose eol-type is N.  */)
9703      (coding_system)
9704      Lisp_Object coding_system;
9705 {
9706   Lisp_Object spec, eol_type;
9707   int n;
9708
9709   if (NILP (coding_system))
9710     coding_system = Qno_conversion;
9711   if (! CODING_SYSTEM_P (coding_system))
9712     return Qnil;
9713   spec = CODING_SYSTEM_SPEC (coding_system);
9714   eol_type = AREF (spec, 2);
9715   if (VECTORP (eol_type))
9716     return Fcopy_sequence (eol_type);
9717   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9718   return make_number (n);
9719 }
9720
9721 #endif /* emacs */
9722
9723 \f
9724 /*** 9. Post-amble ***/
9725
9726 void
9727 init_coding_once ()
9728 {
9729   int i;
9730
9731   for (i = 0; i < coding_category_max; i++)
9732     {
9733       coding_categories[i].id = -1;
9734       coding_priorities[i] = i;
9735     }
9736
9737   /* ISO2022 specific initialize routine.  */
9738   for (i = 0; i < 0x20; i++)
9739     iso_code_class[i] = ISO_control_0;
9740   for (i = 0x21; i < 0x7F; i++)
9741     iso_code_class[i] = ISO_graphic_plane_0;
9742   for (i = 0x80; i < 0xA0; i++)
9743     iso_code_class[i] = ISO_control_1;
9744   for (i = 0xA1; i < 0xFF; i++)
9745     iso_code_class[i] = ISO_graphic_plane_1;
9746   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9747   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
9748   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9749   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9750   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9751   iso_code_class[ISO_CODE_ESC] = ISO_escape;
9752   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9753   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9754   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9755
9756   for (i = 0; i < 256; i++)
9757     {
9758       emacs_mule_bytes[i] = 1;
9759     }
9760   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9761   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9762   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9763   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
9764 }
9765
9766 #ifdef emacs
9767
9768 void
9769 syms_of_coding ()
9770 {
9771   staticpro (&Vcoding_system_hash_table);
9772   {
9773     Lisp_Object args[2];
9774     args[0] = QCtest;
9775     args[1] = Qeq;
9776     Vcoding_system_hash_table = Fmake_hash_table (2, args);
9777   }
9778
9779   staticpro (&Vsjis_coding_system);
9780   Vsjis_coding_system = Qnil;
9781
9782   staticpro (&Vbig5_coding_system);
9783   Vbig5_coding_system = Qnil;
9784
9785   staticpro (&Vcode_conversion_reused_workbuf);
9786   Vcode_conversion_reused_workbuf = Qnil;
9787
9788   staticpro (&Vcode_conversion_workbuf_name);
9789   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9790
9791   reused_workbuf_in_use = 0;
9792
9793   DEFSYM (Qcharset, "charset");
9794   DEFSYM (Qtarget_idx, "target-idx");
9795   DEFSYM (Qcoding_system_history, "coding-system-history");
9796   Fset (Qcoding_system_history, Qnil);
9797
9798   /* Target FILENAME is the first argument.  */
9799   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9800   /* Target FILENAME is the third argument.  */
9801   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9802
9803   DEFSYM (Qcall_process, "call-process");
9804   /* Target PROGRAM is the first argument.  */
9805   Fput (Qcall_process, Qtarget_idx, make_number (0));
9806
9807   DEFSYM (Qcall_process_region, "call-process-region");
9808   /* Target PROGRAM is the third argument.  */
9809   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9810
9811   DEFSYM (Qstart_process, "start-process");
9812   /* Target PROGRAM is the third argument.  */
9813   Fput (Qstart_process, Qtarget_idx, make_number (2));
9814
9815   DEFSYM (Qopen_network_stream, "open-network-stream");
9816   /* Target SERVICE is the fourth argument.  */
9817   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9818
9819   DEFSYM (Qcoding_system, "coding-system");
9820   DEFSYM (Qcoding_aliases, "coding-aliases");
9821
9822   DEFSYM (Qeol_type, "eol-type");
9823   DEFSYM (Qunix, "unix");
9824   DEFSYM (Qdos, "dos");
9825
9826   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9827   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9828   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9829   DEFSYM (Qdefault_char, "default-char");
9830   DEFSYM (Qundecided, "undecided");
9831   DEFSYM (Qno_conversion, "no-conversion");
9832   DEFSYM (Qraw_text, "raw-text");
9833
9834   DEFSYM (Qiso_2022, "iso-2022");
9835
9836   DEFSYM (Qutf_8, "utf-8");
9837   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9838
9839   DEFSYM (Qutf_16, "utf-16");
9840   DEFSYM (Qbig, "big");
9841   DEFSYM (Qlittle, "little");
9842
9843   DEFSYM (Qshift_jis, "shift-jis");
9844   DEFSYM (Qbig5, "big5");
9845
9846   DEFSYM (Qcoding_system_p, "coding-system-p");
9847
9848   DEFSYM (Qcoding_system_error, "coding-system-error");
9849   Fput (Qcoding_system_error, Qerror_conditions,
9850         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9851   Fput (Qcoding_system_error, Qerror_message,
9852         build_string ("Invalid coding system"));
9853
9854   /* Intern this now in case it isn't already done.
9855      Setting this variable twice is harmless.
9856      But don't staticpro it here--that is done in alloc.c.  */
9857   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9858
9859   DEFSYM (Qtranslation_table, "translation-table");
9860   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9861   DEFSYM (Qtranslation_table_id, "translation-table-id");
9862   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9863   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9864
9865   DEFSYM (Qvalid_codes, "valid-codes");
9866
9867   DEFSYM (Qemacs_mule, "emacs-mule");
9868
9869   DEFSYM (QCcategory, ":category");
9870   DEFSYM (QCmnemonic, ":mnemonic");
9871   DEFSYM (QCdefalut_char, ":default-char");
9872   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9873   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9874   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9875   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9876   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
9877
9878   Vcoding_category_table
9879     = Fmake_vector (make_number (coding_category_max), Qnil);
9880   staticpro (&Vcoding_category_table);
9881   /* Followings are target of code detection.  */
9882   ASET (Vcoding_category_table, coding_category_iso_7,
9883         intern ("coding-category-iso-7"));
9884   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9885         intern ("coding-category-iso-7-tight"));
9886   ASET (Vcoding_category_table, coding_category_iso_8_1,
9887         intern ("coding-category-iso-8-1"));
9888   ASET (Vcoding_category_table, coding_category_iso_8_2,
9889         intern ("coding-category-iso-8-2"));
9890   ASET (Vcoding_category_table, coding_category_iso_7_else,
9891         intern ("coding-category-iso-7-else"));
9892   ASET (Vcoding_category_table, coding_category_iso_8_else,
9893         intern ("coding-category-iso-8-else"));
9894   ASET (Vcoding_category_table, coding_category_utf_8_auto,
9895         intern ("coding-category-utf-8-auto"));
9896   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
9897         intern ("coding-category-utf-8"));
9898   ASET (Vcoding_category_table, coding_category_utf_8_sig,
9899         intern ("coding-category-utf-8-sig"));
9900   ASET (Vcoding_category_table, coding_category_utf_16_be,
9901         intern ("coding-category-utf-16-be"));
9902   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9903         intern ("coding-category-utf-16-auto"));
9904   ASET (Vcoding_category_table, coding_category_utf_16_le,
9905         intern ("coding-category-utf-16-le"));
9906   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9907         intern ("coding-category-utf-16-be-nosig"));
9908   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9909         intern ("coding-category-utf-16-le-nosig"));
9910   ASET (Vcoding_category_table, coding_category_charset,
9911         intern ("coding-category-charset"));
9912   ASET (Vcoding_category_table, coding_category_sjis,
9913         intern ("coding-category-sjis"));
9914   ASET (Vcoding_category_table, coding_category_big5,
9915         intern ("coding-category-big5"));
9916   ASET (Vcoding_category_table, coding_category_ccl,
9917         intern ("coding-category-ccl"));
9918   ASET (Vcoding_category_table, coding_category_emacs_mule,
9919         intern ("coding-category-emacs-mule"));
9920   /* Followings are NOT target of code detection.  */
9921   ASET (Vcoding_category_table, coding_category_raw_text,
9922         intern ("coding-category-raw-text"));
9923   ASET (Vcoding_category_table, coding_category_undecided,
9924         intern ("coding-category-undecided"));
9925
9926   DEFSYM (Qinsufficient_source, "insufficient-source");
9927   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9928   DEFSYM (Qinvalid_source, "invalid-source");
9929   DEFSYM (Qinterrupted, "interrupted");
9930   DEFSYM (Qinsufficient_memory, "insufficient-memory");
9931   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
9932
9933   defsubr (&Scoding_system_p);
9934   defsubr (&Sread_coding_system);
9935   defsubr (&Sread_non_nil_coding_system);
9936   defsubr (&Scheck_coding_system);
9937   defsubr (&Sdetect_coding_region);
9938   defsubr (&Sdetect_coding_string);
9939   defsubr (&Sfind_coding_systems_region_internal);
9940   defsubr (&Sunencodable_char_position);
9941   defsubr (&Scheck_coding_systems_region);
9942   defsubr (&Sdecode_coding_region);
9943   defsubr (&Sencode_coding_region);
9944   defsubr (&Sdecode_coding_string);
9945   defsubr (&Sencode_coding_string);
9946   defsubr (&Sdecode_sjis_char);
9947   defsubr (&Sencode_sjis_char);
9948   defsubr (&Sdecode_big5_char);
9949   defsubr (&Sencode_big5_char);
9950   defsubr (&Sset_terminal_coding_system_internal);
9951   defsubr (&Sset_safe_terminal_coding_system_internal);
9952   defsubr (&Sterminal_coding_system);
9953   defsubr (&Sset_keyboard_coding_system_internal);
9954   defsubr (&Skeyboard_coding_system);
9955   defsubr (&Sfind_operation_coding_system);
9956   defsubr (&Sset_coding_system_priority);
9957   defsubr (&Sdefine_coding_system_internal);
9958   defsubr (&Sdefine_coding_system_alias);
9959   defsubr (&Scoding_system_put);
9960   defsubr (&Scoding_system_base);
9961   defsubr (&Scoding_system_plist);
9962   defsubr (&Scoding_system_aliases);
9963   defsubr (&Scoding_system_eol_type);
9964   defsubr (&Scoding_system_priority_list);
9965
9966   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
9967                doc: /* List of coding systems.
9968
9969 Do not alter the value of this variable manually.  This variable should be
9970 updated by the functions `define-coding-system' and
9971 `define-coding-system-alias'.  */);
9972   Vcoding_system_list = Qnil;
9973
9974   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
9975                doc: /* Alist of coding system names.
9976 Each element is one element list of coding system name.
9977 This variable is given to `completing-read' as COLLECTION argument.
9978
9979 Do not alter the value of this variable manually.  This variable should be
9980 updated by the functions `make-coding-system' and
9981 `define-coding-system-alias'.  */);
9982   Vcoding_system_alist = Qnil;
9983
9984   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
9985                doc: /* List of coding-categories (symbols) ordered by priority.
9986
9987 On detecting a coding system, Emacs tries code detection algorithms
9988 associated with each coding-category one by one in this order.  When
9989 one algorithm agrees with a byte sequence of source text, the coding
9990 system bound to the corresponding coding-category is selected.
9991
9992 Don't modify this variable directly, but use `set-coding-priority'.  */);
9993   {
9994     int i;
9995
9996     Vcoding_category_list = Qnil;
9997     for (i = coding_category_max - 1; i >= 0; i--)
9998       Vcoding_category_list
9999         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10000                  Vcoding_category_list);
10001   }
10002
10003   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10004                doc: /* Specify the coding system for read operations.
10005 It is useful to bind this variable with `let', but do not set it globally.
10006 If the value is a coding system, it is used for decoding on read operation.
10007 If not, an appropriate element is used from one of the coding system alists.
10008 There are three such tables: `file-coding-system-alist',
10009 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10010   Vcoding_system_for_read = Qnil;
10011
10012   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10013                doc: /* Specify the coding system for write operations.
10014 Programs bind this variable with `let', but you should not set it globally.
10015 If the value is a coding system, it is used for encoding of output,
10016 when writing it to a file and when sending it to a file or subprocess.
10017
10018 If this does not specify a coding system, an appropriate element
10019 is used from one of the coding system alists.
10020 There are three such tables: `file-coding-system-alist',
10021 `process-coding-system-alist', and `network-coding-system-alist'.
10022 For output to files, if the above procedure does not specify a coding system,
10023 the value of `buffer-file-coding-system' is used.  */);
10024   Vcoding_system_for_write = Qnil;
10025
10026   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10027                doc: /*
10028 Coding system used in the latest file or process I/O.  */);
10029   Vlast_coding_system_used = Qnil;
10030
10031   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10032                doc: /*
10033 Error status of the last code conversion.
10034
10035 When an error was detected in the last code conversion, this variable
10036 is set to one of the following symbols.
10037   `insufficient-source'
10038   `inconsistent-eol'
10039   `invalid-source'
10040   `interrupted'
10041   `insufficient-memory'
10042 When no error was detected, the value doesn't change.  So, to check
10043 the error status of a code conversion by this variable, you must
10044 explicitly set this variable to nil before performing code
10045 conversion.  */);
10046   Vlast_code_conversion_error = Qnil;
10047
10048   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10049                doc: /*
10050 *Non-nil means always inhibit code conversion of end-of-line format.
10051 See info node `Coding Systems' and info node `Text and Binary' concerning
10052 such conversion.  */);
10053   inhibit_eol_conversion = 0;
10054
10055   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10056                doc: /*
10057 Non-nil means process buffer inherits coding system of process output.
10058 Bind it to t if the process output is to be treated as if it were a file
10059 read from some filesystem.  */);
10060   inherit_process_coding_system = 0;
10061
10062   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10063                doc: /*
10064 Alist to decide a coding system to use for a file I/O operation.
10065 The format is ((PATTERN . VAL) ...),
10066 where PATTERN is a regular expression matching a file name,
10067 VAL is a coding system, a cons of coding systems, or a function symbol.
10068 If VAL is a coding system, it is used for both decoding and encoding
10069 the file contents.
10070 If VAL is a cons of coding systems, the car part is used for decoding,
10071 and the cdr part is used for encoding.
10072 If VAL is a function symbol, the function must return a coding system
10073 or a cons of coding systems which are used as above.  The function is
10074 called with an argument that is a list of the arguments with which
10075 `find-operation-coding-system' was called.  If the function can't decide
10076 a coding system, it can return `undecided' so that the normal
10077 code-detection is performed.
10078
10079 See also the function `find-operation-coding-system'
10080 and the variable `auto-coding-alist'.  */);
10081   Vfile_coding_system_alist = Qnil;
10082
10083   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10084                doc: /*
10085 Alist to decide a coding system to use for a process I/O operation.
10086 The format is ((PATTERN . VAL) ...),
10087 where PATTERN is a regular expression matching a program name,
10088 VAL is a coding system, a cons of coding systems, or a function symbol.
10089 If VAL is a coding system, it is used for both decoding what received
10090 from the program and encoding what sent to the program.
10091 If VAL is a cons of coding systems, the car part is used for decoding,
10092 and the cdr part is used for encoding.
10093 If VAL is a function symbol, the function must return a coding system
10094 or a cons of coding systems which are used as above.
10095
10096 See also the function `find-operation-coding-system'.  */);
10097   Vprocess_coding_system_alist = Qnil;
10098
10099   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10100                doc: /*
10101 Alist to decide a coding system to use for a network I/O operation.
10102 The format is ((PATTERN . VAL) ...),
10103 where PATTERN is a regular expression matching a network service name
10104 or is a port number to connect to,
10105 VAL is a coding system, a cons of coding systems, or a function symbol.
10106 If VAL is a coding system, it is used for both decoding what received
10107 from the network stream and encoding what sent to the network stream.
10108 If VAL is a cons of coding systems, the car part is used for decoding,
10109 and the cdr part is used for encoding.
10110 If VAL is a function symbol, the function must return a coding system
10111 or a cons of coding systems which are used as above.
10112
10113 See also the function `find-operation-coding-system'.  */);
10114   Vnetwork_coding_system_alist = Qnil;
10115
10116   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10117                doc: /* Coding system to use with system messages.
10118 Also used for decoding keyboard input on X Window system.  */);
10119   Vlocale_coding_system = Qnil;
10120
10121   /* The eol mnemonics are reset in startup.el system-dependently.  */
10122   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10123                doc: /*
10124 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10125   eol_mnemonic_unix = build_string (":");
10126
10127   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10128                doc: /*
10129 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10130   eol_mnemonic_dos = build_string ("\\");
10131
10132   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10133                doc: /*
10134 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10135   eol_mnemonic_mac = build_string ("/");
10136
10137   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10138                doc: /*
10139 *String displayed in mode line when end-of-line format is not yet determined.  */);
10140   eol_mnemonic_undecided = build_string (":");
10141
10142   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10143                doc: /*
10144 *Non-nil enables character translation while encoding and decoding.  */);
10145   Venable_character_translation = Qt;
10146
10147   DEFVAR_LISP ("standard-translation-table-for-decode",
10148                &Vstandard_translation_table_for_decode,
10149                doc: /* Table for translating characters while decoding.  */);
10150   Vstandard_translation_table_for_decode = Qnil;
10151
10152   DEFVAR_LISP ("standard-translation-table-for-encode",
10153                &Vstandard_translation_table_for_encode,
10154                doc: /* Table for translating characters while encoding.  */);
10155   Vstandard_translation_table_for_encode = Qnil;
10156
10157   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10158                doc: /* Alist of charsets vs revision numbers.
10159 While encoding, if a charset (car part of an element) is found,
10160 designate it with the escape sequence identifying revision (cdr part
10161 of the element).  */);
10162   Vcharset_revision_table = Qnil;
10163
10164   DEFVAR_LISP ("default-process-coding-system",
10165                &Vdefault_process_coding_system,
10166                doc: /* Cons of coding systems used for process I/O by default.
10167 The car part is used for decoding a process output,
10168 the cdr part is used for encoding a text to be sent to a process.  */);
10169   Vdefault_process_coding_system = Qnil;
10170
10171   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10172                doc: /*
10173 Table of extra Latin codes in the range 128..159 (inclusive).
10174 This is a vector of length 256.
10175 If Nth element is non-nil, the existence of code N in a file
10176 \(or output of subprocess) doesn't prevent it to be detected as
10177 a coding system of ISO 2022 variant which has a flag
10178 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10179 or reading output of a subprocess.
10180 Only 128th through 159th elements have a meaning.  */);
10181   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10182
10183   DEFVAR_LISP ("select-safe-coding-system-function",
10184                &Vselect_safe_coding_system_function,
10185                doc: /*
10186 Function to call to select safe coding system for encoding a text.
10187
10188 If set, this function is called to force a user to select a proper
10189 coding system which can encode the text in the case that a default
10190 coding system used in each operation can't encode the text.  The
10191 function should take care that the buffer is not modified while
10192 the coding system is being selected.
10193
10194 The default value is `select-safe-coding-system' (which see).  */);
10195   Vselect_safe_coding_system_function = Qnil;
10196
10197   DEFVAR_BOOL ("coding-system-require-warning",
10198                &coding_system_require_warning,
10199                doc: /* Internal use only.
10200 If non-nil, on writing a file, `select-safe-coding-system-function' is
10201 called even if `coding-system-for-write' is non-nil.  The command
10202 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10203   coding_system_require_warning = 0;
10204
10205
10206   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10207                &inhibit_iso_escape_detection,
10208                doc: /*
10209 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
10210
10211 By default, on reading a file, Emacs tries to detect how the text is
10212 encoded.  This code detection is sensitive to escape sequences.  If
10213 the sequence is valid as ISO2022, the code is determined as one of
10214 the ISO2022 encodings, and the file is decoded by the corresponding
10215 coding system (e.g. `iso-2022-7bit').
10216
10217 However, there may be a case that you want to read escape sequences in
10218 a file as is.  In such a case, you can set this variable to non-nil.
10219 Then, as the code detection ignores any escape sequences, no file is
10220 detected as encoded in some ISO2022 encoding.  The result is that all
10221 escape sequences become visible in a buffer.
10222
10223 The default value is nil, and it is strongly recommended not to change
10224 it.  That is because many Emacs Lisp source files that contain
10225 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10226 in Emacs's distribution, and they won't be decoded correctly on
10227 reading if you suppress escape sequence detection.
10228
10229 The other way to read escape sequences in a file without decoding is
10230 to explicitly specify some coding system that doesn't use ISO2022's
10231 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10232   inhibit_iso_escape_detection = 0;
10233
10234   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10235                doc: /* Char table for translating self-inserting characters.
10236 This is applied to the result of input methods, not their input.
10237 See also `keyboard-translate-table'.  */);
10238     Vtranslation_table_for_input = Qnil;
10239
10240   {
10241     Lisp_Object args[coding_arg_max];
10242     Lisp_Object plist[16];
10243     int i;
10244
10245     for (i = 0; i < coding_arg_max; i++)
10246       args[i] = Qnil;
10247
10248     plist[0] = intern (":name");
10249     plist[1] = args[coding_arg_name] = Qno_conversion;
10250     plist[2] = intern (":mnemonic");
10251     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10252     plist[4] = intern (":coding-type");
10253     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10254     plist[6] = intern (":ascii-compatible-p");
10255     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10256     plist[8] = intern (":default-char");
10257     plist[9] = args[coding_arg_default_char] = make_number (0);
10258     plist[10] = intern (":for-unibyte");
10259     plist[11] = args[coding_arg_for_unibyte] = Qt;
10260     plist[12] = intern (":docstring");
10261     plist[13] = build_string ("Do no conversion.\n\
10262 \n\
10263 When you visit a file with this coding, the file is read into a\n\
10264 unibyte buffer as is, thus each byte of a file is treated as a\n\
10265 character.");
10266     plist[14] = intern (":eol-type");
10267     plist[15] = args[coding_arg_eol_type] = Qunix;
10268     args[coding_arg_plist] = Flist (16, plist);
10269     Fdefine_coding_system_internal (coding_arg_max, args);
10270
10271     plist[1] = args[coding_arg_name] = Qundecided;
10272     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10273     plist[5] = args[coding_arg_coding_type] = Qundecided;
10274     /* This is already set.
10275        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10276     plist[8] = intern (":charset-list");
10277     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10278     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10279     plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10280     plist[15] = args[coding_arg_eol_type] = Qnil;
10281     args[coding_arg_plist] = Flist (16, plist);
10282     Fdefine_coding_system_internal (coding_arg_max, args);
10283   }
10284
10285   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10286
10287   {
10288     int i;
10289
10290     for (i = 0; i < coding_category_max; i++)
10291       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10292   }
10293 #if defined (MSDOS) || defined (WINDOWSNT)
10294   system_eol_type = Qdos;
10295 #else
10296   system_eol_type = Qunix;
10297 #endif
10298   staticpro (&system_eol_type);
10299 }
10300
10301 char *
10302 emacs_strerror (error_number)
10303      int error_number;
10304 {
10305   char *str;
10306
10307   synchronize_system_messages_locale ();
10308   str = strerror (error_number);
10309
10310   if (! NILP (Vlocale_coding_system))
10311     {
10312       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10313                                                       Vlocale_coding_system,
10314                                                       0);
10315       str = (char *) SDATA (dec);
10316     }
10317
10318   return str;
10319 }
10320
10321 #endif /* emacs */
10322
10323 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10324    (do not change this comment) */