src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008, 2009
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (coding, detect_info)
 158      struct coding_system *coding;
 159      struct coding_detection_info *detect_info;
 160 {
 161   const unsigned char *src = coding->source;
 162   const unsigned char *src_end = coding->source + coding->src_bytes;
 163   int multibytep = coding->src_multibyte;
 164   int consumed_chars = 0;
 165   int found = 0;
 166   ...;
 167
 168   while (1)
 169     {
 170       /* Get one byte from the source.  If the souce is exausted, jump
 171          to no_more_source:.  */
 172       ONE_MORE_BYTE (c);
 173
 174       if (! __C_conforms_to_XXX___ (c))
 175         break;
 176       if (! __C_strongly_suggests_XXX__ (c))
 177         found = CATEGORY_MASK_XXX;
 178     }
 179   /* The byte sequence is invalid for XXX.  */
 180   detect_info->rejected |= CATEGORY_MASK_XXX;
 181   return 0;
 182
 183  no_more_source:
 184   /* The source exausted successfully.  */
 185   detect_info->found |= found;
 186   return 1;
 187 }
 188 #endif
 189
 190 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 191
 192   These functions decode a byte sequence specified as a source by
 193   CODING.  The resulting multibyte text goes to a place pointed to by
 194   CODING->charbuf, the length of which should not exceed
 195   CODING->charbuf_size;
 196
 197   These functions set the information of original and decoded texts in
 198   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 199   They also set CODING->result to one of CODING_RESULT_XXX indicating
 200   how the decoding is finished.
 201
 202   Below is the template of these functions.  */
 203
 204 #if 0
 205 static void
 206 decode_coding_XXXX (coding)
 207      struct coding_system *coding;
 208 {
 209   const unsigned char *src = coding->source + coding->consumed;
 210   const unsigned char *src_end = coding->source + coding->src_bytes;
 211   /* SRC_BASE remembers the start position in source in each loop.
 212      The loop will be exited when there's not enough source code, or
 213      when there's no room in CHARBUF for a decoded character.  */
 214   const unsigned char *src_base;
 215   /* A buffer to produce decoded characters.  */
 216   int *charbuf = coding->charbuf + coding->charbuf_used;
 217   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 218   int multibytep = coding->src_multibyte;
 219
 220   while (1)
 221     {
 222       src_base = src;
 223       if (charbuf < charbuf_end)
 224         /* No more room to produce a decoded character.  */
 225         break;
 226       ONE_MORE_BYTE (c);
 227       /* Decode it. */
 228     }
 229
 230  no_more_source:
 231   if (src_base < src_end
 232       && coding->mode & CODING_MODE_LAST_BLOCK)
 233     /* If the source ends by partial bytes to construct a character,
 234        treat them as eight-bit raw data.  */
 235     while (src_base < src_end && charbuf < charbuf_end)
 236       *charbuf++ = *src_base++;
 237   /* Remember how many bytes and characters we consumed.  If the
 238      source is multibyte, the bytes and chars are not identical.  */
 239   coding->consumed = coding->consumed_char = src_base - coding->source;
 240   /* Remember how many characters we produced.  */
 241   coding->charbuf_used = charbuf - coding->charbuf;
 242 }
 243 #endif
 244
 245 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 246
 247   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 248   internal multibyte format by CODING.  The resulting byte sequence
 249   goes to a place pointed to by DESTINATION, the length of which
 250   should not exceed DST_BYTES.
 251
 252   These functions set the information of original and encoded texts in
 253   the members produced, produced_char, consumed, and consumed_char of
 254   the structure *CODING.  They also set the member result to one of
 255   CODING_RESULT_XXX indicating how the encoding finished.
 256
 257   DST_BYTES zero means that source area and destination area are
 258   overlapped, which means that we can produce a encoded text until it
 259   reaches at the head of not-yet-encoded source text.
 260
 261   Below is a template of these functions.  */
 262 #if 0
 263 static void
 264 encode_coding_XXX (coding)
 265      struct coding_system *coding;
 266 {
 267   int multibytep = coding->dst_multibyte;
 268   int *charbuf = coding->charbuf;
 269   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 270   unsigned char *dst = coding->destination + coding->produced;
 271   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 272   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 273   int produced_chars = 0;
 274
 275   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 276     {
 277       int c = *charbuf;
 278       /* Encode C into DST, and increment DST.  */
 279     }
 280  label_no_more_destination:
 281   /* How many chars and bytes we produced.  */
 282   coding->produced_char += produced_chars;
 283   coding->produced = dst - coding->destination;
 284 }
 285 #endif
 286
 287 \f
 288 /*** 1. Preamble ***/
 289
 290 #include <config.h>
 291 #include <stdio.h>
 292
 293 #include "lisp.h"
 294 #include "buffer.h"
 295 #include "character.h"
 296 #include "charset.h"
 297 #include "ccl.h"
 298 #include "composite.h"
 299 #include "coding.h"
 300 #include "window.h"
 301 #include "frame.h"
 302 #include "termhooks.h"
 303
 304 Lisp_Object Vcoding_system_hash_table;
 305
 306 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 307 Lisp_Object Qunix, Qdos;
 308 extern Lisp_Object Qmac;        /* frame.c */
 309 Lisp_Object Qbuffer_file_coding_system;
 310 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 311 Lisp_Object Qdefault_char;
 312 Lisp_Object Qno_conversion, Qundecided;
 313 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 314 Lisp_Object Qbig, Qlittle;
 315 Lisp_Object Qcoding_system_history;
 316 Lisp_Object Qvalid_codes;
 317 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 318 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 319 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 320 Lisp_Object QCascii_compatible_p;
 321
 322 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 323 Lisp_Object Qcall_process, Qcall_process_region;
 324 Lisp_Object Qstart_process, Qopen_network_stream;
 325 Lisp_Object Qtarget_idx;
 326
 327 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 328 Lisp_Object Qinterrupted, Qinsufficient_memory;
 329
 330 extern Lisp_Object Qcompletion_ignore_case;
 331
 332 /* If a symbol has this property, evaluate the value to define the
 333    symbol as a coding system.  */
 334 static Lisp_Object Qcoding_system_define_form;
 335
 336 int coding_system_require_warning;
 337
 338 Lisp_Object Vselect_safe_coding_system_function;
 339
 340 /* Mnemonic string for each format of end-of-line.  */
 341 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 342 /* Mnemonic string to indicate format of end-of-line is not yet
 343    decided.  */
 344 Lisp_Object eol_mnemonic_undecided;
 345
 346 /* Format of end-of-line decided by system.  This is Qunix on
 347    Unix and Mac, Qdos on DOS/Windows.
 348    This has an effect only for external encoding (i.e. for output to
 349    file and process), not for in-buffer or Lisp string encoding.  */
 350 static Lisp_Object system_eol_type;
 351
 352 #ifdef emacs
 353
 354 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 355
 356 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 357
 358 /* Coding system emacs-mule and raw-text are for converting only
 359    end-of-line format.  */
 360 Lisp_Object Qemacs_mule, Qraw_text;
 361 Lisp_Object Qutf_8_emacs;
 362
 363 /* Coding-systems are handed between Emacs Lisp programs and C internal
 364    routines by the following three variables.  */
 365 /* Coding-system for reading files and receiving data from process.  */
 366 Lisp_Object Vcoding_system_for_read;
 367 /* Coding-system for writing files and sending data to process.  */
 368 Lisp_Object Vcoding_system_for_write;
 369 /* Coding-system actually used in the latest I/O.  */
 370 Lisp_Object Vlast_coding_system_used;
 371 /* Set to non-nil when an error is detected while code conversion.  */
 372 Lisp_Object Vlast_code_conversion_error;
 373 /* A vector of length 256 which contains information about special
 374    Latin codes (especially for dealing with Microsoft codes).  */
 375 Lisp_Object Vlatin_extra_code_table;
 376
 377 /* Flag to inhibit code conversion of end-of-line format.  */
 378 int inhibit_eol_conversion;
 379
 380 /* Flag to inhibit ISO2022 escape sequence detection.  */
 381 int inhibit_iso_escape_detection;
 382
 383 /* Flag to inhibit detection of binary files through null bytes.  */
 384 int inhibit_null_byte_detection;
 385
 386 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 387 int inherit_process_coding_system;
 388
 389 /* Coding system to be used to encode text for terminal display when
 390    terminal coding system is nil.  */
 391 struct coding_system safe_terminal_coding;
 392
 393 Lisp_Object Vfile_coding_system_alist;
 394 Lisp_Object Vprocess_coding_system_alist;
 395 Lisp_Object Vnetwork_coding_system_alist;
 396
 397 Lisp_Object Vlocale_coding_system;
 398
 399 #endif /* emacs */
 400
 401 /* Flag to tell if we look up translation table on character code
 402    conversion.  */
 403 Lisp_Object Venable_character_translation;
 404 /* Standard translation table to look up on decoding (reading).  */
 405 Lisp_Object Vstandard_translation_table_for_decode;
 406 /* Standard translation table to look up on encoding (writing).  */
 407 Lisp_Object Vstandard_translation_table_for_encode;
 408
 409 Lisp_Object Qtranslation_table;
 410 Lisp_Object Qtranslation_table_id;
 411 Lisp_Object Qtranslation_table_for_decode;
 412 Lisp_Object Qtranslation_table_for_encode;
 413
 414 /* Alist of charsets vs revision number.  */
 415 static Lisp_Object Vcharset_revision_table;
 416
 417 /* Default coding systems used for process I/O.  */
 418 Lisp_Object Vdefault_process_coding_system;
 419
 420 /* Char table for translating Quail and self-inserting input.  */
 421 Lisp_Object Vtranslation_table_for_input;
 422
 423 /* Two special coding systems.  */
 424 Lisp_Object Vsjis_coding_system;
 425 Lisp_Object Vbig5_coding_system;
 426
 427 /* ISO2022 section */
 428
 429 #define CODING_ISO_INITIAL(coding, reg)                 \
 430   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 431                      coding_attr_iso_initial),          \
 432                reg)))
 433
 434
 435 #define CODING_ISO_REQUEST(coding, charset_id)  \
 436   ((charset_id <= (coding)->max_charset_id      \
 437     ? (coding)->safe_charsets[charset_id]       \
 438     : -1))
 439
 440
 441 #define CODING_ISO_FLAGS(coding)        \
 442   ((coding)->spec.iso_2022.flags)
 443 #define CODING_ISO_DESIGNATION(coding, reg)     \
 444   ((coding)->spec.iso_2022.current_designation[reg])
 445 #define CODING_ISO_INVOCATION(coding, plane)    \
 446   ((coding)->spec.iso_2022.current_invocation[plane])
 447 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 448   ((coding)->spec.iso_2022.single_shifting)
 449 #define CODING_ISO_BOL(coding)  \
 450   ((coding)->spec.iso_2022.bol)
 451 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 452   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 453
 454 /* Control characters of ISO2022.  */
 455                         /* code */      /* function */
 456 #define ISO_CODE_LF     0x0A            /* line-feed */
 457 #define ISO_CODE_CR     0x0D            /* carriage-return */
 458 #define ISO_CODE_SO     0x0E            /* shift-out */
 459 #define ISO_CODE_SI     0x0F            /* shift-in */
 460 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 461 #define ISO_CODE_ESC    0x1B            /* escape */
 462 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 463 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 464 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 465
 466 /* All code (1-byte) of ISO2022 is classified into one of the
 467    followings.  */
 468 enum iso_code_class_type
 469   {
 470     ISO_control_0,              /* Control codes in the range
 471                                    0x00..0x1F and 0x7F, except for the
 472                                    following 5 codes.  */
 473     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 474     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 475     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 476     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 477     ISO_control_1,              /* Control codes in the range
 478                                    0x80..0x9F, except for the
 479                                    following 3 codes.  */
 480     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 481     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 482     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 483     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 484     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 485     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 486     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 487   };
 488
 489 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 490     `iso-flags' attribute of an iso2022 coding system.  */
 491
 492 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 493    instead of the correct short-form sequence (e.g. ESC $ A).  */
 494 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 495
 496 /* If set, reset graphic planes and registers at end-of-line to the
 497    initial state.  */
 498 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 499
 500 /* If set, reset graphic planes and registers before any control
 501    characters to the initial state.  */
 502 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 503
 504 /* If set, encode by 7-bit environment.  */
 505 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 506
 507 /* If set, use locking-shift function.  */
 508 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 509
 510 /* If set, use single-shift function.  Overwrite
 511    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 512 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 513
 514 /* If set, use designation escape sequence.  */
 515 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 516
 517 /* If set, produce revision number sequence.  */
 518 #define CODING_ISO_FLAG_REVISION        0x0080
 519
 520 /* If set, produce ISO6429's direction specifying sequence.  */
 521 #define CODING_ISO_FLAG_DIRECTION       0x0100
 522
 523 /* If set, assume designation states are reset at beginning of line on
 524    output.  */
 525 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 526
 527 /* If set, designation sequence should be placed at beginning of line
 528    on output.  */
 529 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 530
 531 /* If set, do not encode unsafe charactes on output.  */
 532 #define CODING_ISO_FLAG_SAFE            0x0800
 533
 534 /* If set, extra latin codes (128..159) are accepted as a valid code
 535    on input.  */
 536 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 537
 538 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 539
 540 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 541
 542 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 543
 544 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 545
 546 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 547
 548 /* A character to be produced on output if encoding of the original
 549    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 550 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 551
 552 /* UTF-8 section */
 553 #define CODING_UTF_8_BOM(coding)        \
 554   ((coding)->spec.utf_8_bom)
 555
 556 /* UTF-16 section */
 557 #define CODING_UTF_16_BOM(coding)       \
 558   ((coding)->spec.utf_16.bom)
 559
 560 #define CODING_UTF_16_ENDIAN(coding)    \
 561   ((coding)->spec.utf_16.endian)
 562
 563 #define CODING_UTF_16_SURROGATE(coding) \
 564   ((coding)->spec.utf_16.surrogate)
 565
 566
 567 /* CCL section */
 568 #define CODING_CCL_DECODER(coding)      \
 569   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 570 #define CODING_CCL_ENCODER(coding)      \
 571   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 572 #define CODING_CCL_VALIDS(coding)                                          \
 573   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 574
 575 /* Index for each coding category in `coding_categories' */
 576
 577 enum coding_category
 578   {
 579     coding_category_iso_7,
 580     coding_category_iso_7_tight,
 581     coding_category_iso_8_1,
 582     coding_category_iso_8_2,
 583     coding_category_iso_7_else,
 584     coding_category_iso_8_else,
 585     coding_category_utf_8_auto,
 586     coding_category_utf_8_nosig,
 587     coding_category_utf_8_sig,
 588     coding_category_utf_16_auto,
 589     coding_category_utf_16_be,
 590     coding_category_utf_16_le,
 591     coding_category_utf_16_be_nosig,
 592     coding_category_utf_16_le_nosig,
 593     coding_category_charset,
 594     coding_category_sjis,
 595     coding_category_big5,
 596     coding_category_ccl,
 597     coding_category_emacs_mule,
 598     /* All above are targets of code detection.  */
 599     coding_category_raw_text,
 600     coding_category_undecided,
 601     coding_category_max
 602   };
 603
 604 /* Definitions of flag bits used in detect_coding_XXXX.  */
 605 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 606 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 607 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 608 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 609 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 610 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 611 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 612 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 613 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 614 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 615 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 616 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 617 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 618 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 619 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 620 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 621 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 622 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 623 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 624 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 625
 626 /* This value is returned if detect_coding_mask () find nothing other
 627    than ASCII characters.  */
 628 #define CATEGORY_MASK_ANY               \
 629   (CATEGORY_MASK_ISO_7                  \
 630    | CATEGORY_MASK_ISO_7_TIGHT          \
 631    | CATEGORY_MASK_ISO_8_1              \
 632    | CATEGORY_MASK_ISO_8_2              \
 633    | CATEGORY_MASK_ISO_7_ELSE           \
 634    | CATEGORY_MASK_ISO_8_ELSE           \
 635    | CATEGORY_MASK_UTF_8_AUTO           \
 636    | CATEGORY_MASK_UTF_8_NOSIG          \
 637    | CATEGORY_MASK_UTF_8_SIG            \
 638    | CATEGORY_MASK_UTF_16_AUTO          \
 639    | CATEGORY_MASK_UTF_16_BE            \
 640    | CATEGORY_MASK_UTF_16_LE            \
 641    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 642    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 643    | CATEGORY_MASK_CHARSET              \
 644    | CATEGORY_MASK_SJIS                 \
 645    | CATEGORY_MASK_BIG5                 \
 646    | CATEGORY_MASK_CCL                  \
 647    | CATEGORY_MASK_EMACS_MULE)
 648
 649
 650 #define CATEGORY_MASK_ISO_7BIT \
 651   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 652
 653 #define CATEGORY_MASK_ISO_8BIT \
 654   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 655
 656 #define CATEGORY_MASK_ISO_ELSE \
 657   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 658
 659 #define CATEGORY_MASK_ISO_ESCAPE        \
 660   (CATEGORY_MASK_ISO_7                  \
 661    | CATEGORY_MASK_ISO_7_TIGHT          \
 662    | CATEGORY_MASK_ISO_7_ELSE           \
 663    | CATEGORY_MASK_ISO_8_ELSE)
 664
 665 #define CATEGORY_MASK_ISO       \
 666   (  CATEGORY_MASK_ISO_7BIT     \
 667      | CATEGORY_MASK_ISO_8BIT   \
 668      | CATEGORY_MASK_ISO_ELSE)
 669
 670 #define CATEGORY_MASK_UTF_16            \
 671   (CATEGORY_MASK_UTF_16_AUTO            \
 672    | CATEGORY_MASK_UTF_16_BE            \
 673    | CATEGORY_MASK_UTF_16_LE            \
 674    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 675    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 676
 677 #define CATEGORY_MASK_UTF_8     \
 678   (CATEGORY_MASK_UTF_8_AUTO     \
 679    | CATEGORY_MASK_UTF_8_NOSIG  \
 680    | CATEGORY_MASK_UTF_8_SIG)
 681
 682 /* List of symbols `coding-category-xxx' ordered by priority.  This
 683    variable is exposed to Emacs Lisp.  */
 684 static Lisp_Object Vcoding_category_list;
 685
 686 /* Table of coding categories (Lisp symbols).  This variable is for
 687    internal use oly.  */
 688 static Lisp_Object Vcoding_category_table;
 689
 690 /* Table of coding-categories ordered by priority.  */
 691 static enum coding_category coding_priorities[coding_category_max];
 692
 693 /* Nth element is a coding context for the coding system bound to the
 694    Nth coding category.  */
 695 static struct coding_system coding_categories[coding_category_max];
 696
 697 /*** Commonly used macros and functions ***/
 698
 699 #ifndef min
 700 #define min(a, b) ((a) < (b) ? (a) : (b))
 701 #endif
 702 #ifndef max
 703 #define max(a, b) ((a) > (b) ? (a) : (b))
 704 #endif
 705
 706 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 707   do {                                                  \
 708     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 709     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 710   } while (0)
 711
 712
 713 /* Safely get one byte from the source text pointed by SRC which ends
 714    at SRC_END, and set C to that byte.  If there are not enough bytes
 715    in the source, it jumps to `no_more_source'.  If multibytep is
 716    nonzero, and a multibyte character is found at SRC, set C to the
 717    negative value of the character code.  The caller should declare
 718    and set these variables appropriately in advance:
 719         src, src_end, multibytep */
 720
 721 #define ONE_MORE_BYTE(c)                                \
 722   do {                                                  \
 723     if (src == src_end)                                 \
 724       {                                                 \
 725         if (src_base < src)                             \
 726           record_conversion_result                      \
 727             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 728         goto no_more_source;                            \
 729       }                                                 \
 730     c = *src++;                                         \
 731     if (multibytep && (c & 0x80))                       \
 732       {                                                 \
 733         if ((c & 0xFE) == 0xC0)                         \
 734           c = ((c & 1) << 6) | *src++;                  \
 735         else                                            \
 736           {                                             \
 737             src--;                                      \
 738             c = - string_char (src, &src, NULL);        \
 739             record_conversion_result                    \
 740               (coding, CODING_RESULT_INVALID_SRC);      \
 741           }                                             \
 742       }                                                 \
 743     consumed_chars++;                                   \
 744   } while (0)
 745
 746
 747 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 748   do {                                                  \
 749     c = *src++;                                         \
 750     if (multibytep && (c & 0x80))                       \
 751       {                                                 \
 752         if ((c & 0xFE) == 0xC0)                         \
 753           c = ((c & 1) << 6) | *src++;                  \
 754         else                                            \
 755           {                                             \
 756             src--;                                      \
 757             c = - string_char (src, &src, NULL);        \
 758             record_conversion_result                    \
 759               (coding, CODING_RESULT_INVALID_SRC);      \
 760           }                                             \
 761       }                                                 \
 762     consumed_chars++;                                   \
 763   } while (0)
 764
 765
 766 /* Store a byte C in the place pointed by DST and increment DST to the
 767    next free point, and increment PRODUCED_CHARS.  The caller should
 768    assure that C is 0..127, and declare and set the variable `dst'
 769    appropriately in advance.
 770 */
 771
 772
 773 #define EMIT_ONE_ASCII_BYTE(c)  \
 774   do {                          \
 775     produced_chars++;           \
 776     *dst++ = (c);               \
 777   } while (0)
 778
 779
 780 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 781
 782 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 783   do {                                  \
 784     produced_chars += 2;                \
 785     *dst++ = (c1), *dst++ = (c2);       \
 786   } while (0)
 787
 788
 789 /* Store a byte C in the place pointed by DST and increment DST to the
 790    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 791    nonzero, store in an appropriate multibyte from.  The caller should
 792    declare and set the variables `dst' and `multibytep' appropriately
 793    in advance.  */
 794
 795 #define EMIT_ONE_BYTE(c)                \
 796   do {                                  \
 797     produced_chars++;                   \
 798     if (multibytep)                     \
 799       {                                 \
 800         int ch = (c);                   \
 801         if (ch >= 0x80)                 \
 802           ch = BYTE8_TO_CHAR (ch);      \
 803         CHAR_STRING_ADVANCE (ch, dst);  \
 804       }                                 \
 805     else                                \
 806       *dst++ = (c);                     \
 807   } while (0)
 808
 809
 810 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 811
 812 #define EMIT_TWO_BYTES(c1, c2)          \
 813   do {                                  \
 814     produced_chars += 2;                \
 815     if (multibytep)                     \
 816       {                                 \
 817         int ch;                         \
 818                                         \
 819         ch = (c1);                      \
 820         if (ch >= 0x80)                 \
 821           ch = BYTE8_TO_CHAR (ch);      \
 822         CHAR_STRING_ADVANCE (ch, dst);  \
 823         ch = (c2);                      \
 824         if (ch >= 0x80)                 \
 825           ch = BYTE8_TO_CHAR (ch);      \
 826         CHAR_STRING_ADVANCE (ch, dst);  \
 827       }                                 \
 828     else                                \
 829       {                                 \
 830         *dst++ = (c1);                  \
 831         *dst++ = (c2);                  \
 832       }                                 \
 833   } while (0)
 834
 835
 836 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 837   do {                                  \
 838     EMIT_ONE_BYTE (c1);                 \
 839     EMIT_TWO_BYTES (c2, c3);            \
 840   } while (0)
 841
 842
 843 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 844   do {                                          \
 845     EMIT_TWO_BYTES (c1, c2);                    \
 846     EMIT_TWO_BYTES (c3, c4);                    \
 847   } while (0)
 848
 849
 850 /* Prototypes for static functions.  */
 851 static void record_conversion_result P_ ((struct coding_system *coding,
 852                                           enum coding_result_code result));
 853 static int detect_coding_utf_8 P_ ((struct coding_system *,
 854                                     struct coding_detection_info *info));
 855 static void decode_coding_utf_8 P_ ((struct coding_system *));
 856 static int encode_coding_utf_8 P_ ((struct coding_system *));
 857
 858 static int detect_coding_utf_16 P_ ((struct coding_system *,
 859                                      struct coding_detection_info *info));
 860 static void decode_coding_utf_16 P_ ((struct coding_system *));
 861 static int encode_coding_utf_16 P_ ((struct coding_system *));
 862
 863 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 864                                        struct coding_detection_info *info));
 865 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 866 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 867
 868 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 869                                          struct coding_detection_info *info));
 870 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 871 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 872
 873 static int detect_coding_sjis P_ ((struct coding_system *,
 874                                    struct coding_detection_info *info));
 875 static void decode_coding_sjis P_ ((struct coding_system *));
 876 static int encode_coding_sjis P_ ((struct coding_system *));
 877
 878 static int detect_coding_big5 P_ ((struct coding_system *,
 879                                    struct coding_detection_info *info));
 880 static void decode_coding_big5 P_ ((struct coding_system *));
 881 static int encode_coding_big5 P_ ((struct coding_system *));
 882
 883 static int detect_coding_ccl P_ ((struct coding_system *,
 884                                   struct coding_detection_info *info));
 885 static void decode_coding_ccl P_ ((struct coding_system *));
 886 static int encode_coding_ccl P_ ((struct coding_system *));
 887
 888 static void decode_coding_raw_text P_ ((struct coding_system *));
 889 static int encode_coding_raw_text P_ ((struct coding_system *));
 890
 891 static void coding_set_source P_ ((struct coding_system *));
 892 static void coding_set_destination P_ ((struct coding_system *));
 893 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 894 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 895                                             EMACS_INT, EMACS_INT));
 896 static unsigned char *alloc_destination P_ ((struct coding_system *,
 897                                              EMACS_INT, unsigned char *));
 898 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 899 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 900                                                      int *, int *,
 901                                                      unsigned char *));
 902 static int detect_eol P_ ((const unsigned char *,
 903                            EMACS_INT, enum coding_category));
 904 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 905 static void decode_eol P_ ((struct coding_system *));
 906 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 907 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 908                                         int, int *, int *));
 909 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 910 static INLINE void produce_composition P_ ((struct coding_system *, int *,
 911                                             EMACS_INT));
 912 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 913                                         EMACS_INT));
 914 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 915 static int decode_coding P_ ((struct coding_system *));
 916 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 917                                                       struct coding_system *,
 918                                                       int *, EMACS_INT *));
 919 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 920                                                   struct coding_system *,
 921                                                   int *, EMACS_INT *));
 922 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 923 static int encode_coding P_ ((struct coding_system *));
 924 static Lisp_Object make_conversion_work_buffer P_ ((int));
 925 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 926 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 927 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 928
 929 static void
 930 record_conversion_result (struct coding_system *coding,
 931                           enum coding_result_code result)
 932 {
 933   coding->result = result;
 934   switch (result)
 935     {
 936     case CODING_RESULT_INSUFFICIENT_SRC:
 937       Vlast_code_conversion_error = Qinsufficient_source;
 938       break;
 939     case CODING_RESULT_INCONSISTENT_EOL:
 940       Vlast_code_conversion_error = Qinconsistent_eol;
 941       break;
 942     case CODING_RESULT_INVALID_SRC:
 943       Vlast_code_conversion_error = Qinvalid_source;
 944       break;
 945     case CODING_RESULT_INTERRUPT:
 946       Vlast_code_conversion_error = Qinterrupted;
 947       break;
 948     case CODING_RESULT_INSUFFICIENT_MEM:
 949       Vlast_code_conversion_error = Qinsufficient_memory;
 950       break;
 951     default:
 952       Vlast_code_conversion_error = intern ("Unknown error");
 953     }
 954 }
 955
 956 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 957   do {                                                                       \
 958     charset_map_loaded = 0;                                                  \
 959     c = DECODE_CHAR (charset, code);                                         \
 960     if (charset_map_loaded)                                                  \
 961       {                                                                      \
 962         const unsigned char *orig = coding->source;                          \
 963         EMACS_INT offset;                                                    \
 964                                                                              \
 965         coding_set_source (coding);                                          \
 966         offset = coding->source - orig;                                      \
 967         src += offset;                                                       \
 968         src_base += offset;                                                  \
 969         src_end += offset;                                                   \
 970       }                                                                      \
 971   } while (0)
 972
 973
 974 /* If there are at least BYTES length of room at dst, allocate memory
 975    for coding->destination and update dst and dst_end.  We don't have
 976    to take care of coding->source which will be relocated.  It is
 977    handled by calling coding_set_source in encode_coding.  */
 978
 979 #define ASSURE_DESTINATION(bytes)                               \
 980   do {                                                          \
 981     if (dst + (bytes) >= dst_end)                               \
 982       {                                                         \
 983         int more_bytes = charbuf_end - charbuf + (bytes);       \
 984                                                                 \
 985         dst = alloc_destination (coding, more_bytes, dst);      \
 986         dst_end = coding->destination + coding->dst_bytes;      \
 987       }                                                         \
 988   } while (0)
 989
 990
 991 /* Store multibyte form of the character C in P, and advance P to the
 992    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
 993    never calls MAYBE_UNIFY_CHAR.  */
 994
 995 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
 996   do {                                          \
 997     if ((c) <= MAX_1_BYTE_CHAR)                 \
 998       *(p)++ = (c);                             \
 999     else if ((c) <= MAX_2_BYTE_CHAR)            \
1000       *(p)++ = (0xC0 | ((c) >> 6)),             \
1001         *(p)++ = (0x80 | ((c) & 0x3F));         \
1002     else if ((c) <= MAX_3_BYTE_CHAR)            \
1003       *(p)++ = (0xE0 | ((c) >> 12)),            \
1004         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1005         *(p)++ = (0x80 | ((c) & 0x3F));         \
1006     else if ((c) <= MAX_4_BYTE_CHAR)            \
1007       *(p)++ = (0xF0 | (c >> 18)),              \
1008         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1009         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1010         *(p)++ = (0x80 | (c & 0x3F));           \
1011     else if ((c) <= MAX_5_BYTE_CHAR)            \
1012       *(p)++ = 0xF8,                            \
1013         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1014         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1015         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1016         *(p)++ = (0x80 | (c & 0x3F));           \
1017     else                                        \
1018       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1019   } while (0)
1020
1021
1022 /* Return the character code of character whose multibyte form is at
1023    P, and advance P to the end of the multibyte form.  This is like
1024    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1025
1026 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1027   (!((p)[0] & 0x80)                                             \
1028    ? *(p)++                                                     \
1029    : ! ((p)[0] & 0x20)                                          \
1030    ? ((p) += 2,                                                 \
1031       ((((p)[-2] & 0x1F) << 6)                                  \
1032        | ((p)[-1] & 0x3F)                                       \
1033        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1034    : ! ((p)[0] & 0x10)                                          \
1035    ? ((p) += 3,                                                 \
1036       ((((p)[-3] & 0x0F) << 12)                                 \
1037        | (((p)[-2] & 0x3F) << 6)                                \
1038        | ((p)[-1] & 0x3F)))                                     \
1039    : ! ((p)[0] & 0x08)                                          \
1040    ? ((p) += 4,                                                 \
1041       ((((p)[-4] & 0xF) << 18)                                  \
1042        | (((p)[-3] & 0x3F) << 12)                               \
1043        | (((p)[-2] & 0x3F) << 6)                                \
1044        | ((p)[-1] & 0x3F)))                                     \
1045    : ((p) += 5,                                                 \
1046       ((((p)[-4] & 0x3F) << 18)                                 \
1047        | (((p)[-3] & 0x3F) << 12)                               \
1048        | (((p)[-2] & 0x3F) << 6)                                \
1049        | ((p)[-1] & 0x3F))))
1050
1051
1052 static void
1053 coding_set_source (coding)
1054      struct coding_system *coding;
1055 {
1056   if (BUFFERP (coding->src_object))
1057     {
1058       struct buffer *buf = XBUFFER (coding->src_object);
1059
1060       if (coding->src_pos < 0)
1061         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1062       else
1063         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1064     }
1065   else if (STRINGP (coding->src_object))
1066     {
1067       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1068     }
1069   else
1070     /* Otherwise, the source is C string and is never relocated
1071        automatically.  Thus we don't have to update anything.  */
1072     ;
1073 }
1074
1075 static void
1076 coding_set_destination (coding)
1077      struct coding_system *coding;
1078 {
1079   if (BUFFERP (coding->dst_object))
1080     {
1081       if (coding->src_pos < 0)
1082         {
1083           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1084           coding->dst_bytes = (GAP_END_ADDR
1085                                - (coding->src_bytes - coding->consumed)
1086                                - coding->destination);
1087         }
1088       else
1089         {
1090           /* We are sure that coding->dst_pos_byte is before the gap
1091              of the buffer. */
1092           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1093                                  + coding->dst_pos_byte - BEG_BYTE);
1094           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1095                                - coding->destination);
1096         }
1097     }
1098   else
1099     /* Otherwise, the destination is C string and is never relocated
1100        automatically.  Thus we don't have to update anything.  */
1101     ;
1102 }
1103
1104
1105 static void
1106 coding_alloc_by_realloc (coding, bytes)
1107      struct coding_system *coding;
1108      EMACS_INT bytes;
1109 {
1110   coding->destination = (unsigned char *) xrealloc (coding->destination,
1111                                                     coding->dst_bytes + bytes);
1112   coding->dst_bytes += bytes;
1113 }
1114
1115 static void
1116 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1117      struct coding_system *coding;
1118      EMACS_INT gap_head_used, bytes;
1119 {
1120   if (EQ (coding->src_object, coding->dst_object))
1121     {
1122       /* The gap may contain the produced data at the head and not-yet
1123          consumed data at the tail.  To preserve those data, we at
1124          first make the gap size to zero, then increase the gap
1125          size.  */
1126       EMACS_INT add = GAP_SIZE;
1127
1128       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1129       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1130       make_gap (bytes);
1131       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1132       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1133     }
1134   else
1135     {
1136       Lisp_Object this_buffer;
1137
1138       this_buffer = Fcurrent_buffer ();
1139       set_buffer_internal (XBUFFER (coding->dst_object));
1140       make_gap (bytes);
1141       set_buffer_internal (XBUFFER (this_buffer));
1142     }
1143 }
1144
1145
1146 static unsigned char *
1147 alloc_destination (coding, nbytes, dst)
1148      struct coding_system *coding;
1149      EMACS_INT nbytes;
1150      unsigned char *dst;
1151 {
1152   EMACS_INT offset = dst - coding->destination;
1153
1154   if (BUFFERP (coding->dst_object))
1155     {
1156       struct buffer *buf = XBUFFER (coding->dst_object);
1157
1158       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1159     }
1160   else
1161     coding_alloc_by_realloc (coding, nbytes);
1162   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1163   coding_set_destination (coding);
1164   dst = coding->destination + offset;
1165   return dst;
1166 }
1167
1168 /** Macros for annotations.  */
1169
1170 /* Maximum length of annotation data (sum of annotations for
1171    composition and charset).  */
1172 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1173
1174 /* An annotation data is stored in the array coding->charbuf in this
1175    format:
1176      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1177    LENGTH is the number of elements in the annotation.
1178    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1179    NCHARS is the number of characters in the text annotated.
1180
1181    The format of the following elements depend on ANNOTATION_MASK.
1182
1183    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1184    follows:
1185      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1186    METHOD is one of enum composition_method.
1187    Optionnal COMPOSITION-COMPONENTS are characters and composition
1188    rules.
1189
1190    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1191    follows.  */
1192
1193 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1194   do {                                                  \
1195     *(buf)++ = -(len);                                  \
1196     *(buf)++ = (mask);                                  \
1197     *(buf)++ = (nchars);                                \
1198     coding->annotated = 1;                              \
1199   } while (0);
1200
1201 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1202   do {                                                                      \
1203     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1204     *buf++ = method;                                                        \
1205   } while (0)
1206
1207
1208 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1209   do {                                                                  \
1210     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1211     *buf++ = id;                                                        \
1212   } while (0)
1213
1214 \f
1215 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1216
1217
1218
1219 \f
1220 /*** 3. UTF-8 ***/
1221
1222 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1223    Check if a text is encoded in UTF-8.  If it is, return 1, else
1224    return 0.  */
1225
1226 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1227 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1228 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1229 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1230 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1231 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1232
1233 #define UTF_BOM 0xFEFF
1234 #define UTF_8_BOM_1 0xEF
1235 #define UTF_8_BOM_2 0xBB
1236 #define UTF_8_BOM_3 0xBF
1237
1238 static int
1239 detect_coding_utf_8 (coding, detect_info)
1240      struct coding_system *coding;
1241      struct coding_detection_info *detect_info;
1242 {
1243   const unsigned char *src = coding->source, *src_base;
1244   const unsigned char *src_end = coding->source + coding->src_bytes;
1245   int multibytep = coding->src_multibyte;
1246   int consumed_chars = 0;
1247   int bom_found = 0;
1248   int found = 0;
1249
1250   detect_info->checked |= CATEGORY_MASK_UTF_8;
1251   /* A coding system of this category is always ASCII compatible.  */
1252   src += coding->head_ascii;
1253
1254   while (1)
1255     {
1256       int c, c1, c2, c3, c4;
1257
1258       src_base = src;
1259       ONE_MORE_BYTE (c);
1260       if (c < 0 || UTF_8_1_OCTET_P (c))
1261         continue;
1262       ONE_MORE_BYTE (c1);
1263       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1264         break;
1265       if (UTF_8_2_OCTET_LEADING_P (c))
1266         {
1267           found = 1;
1268           continue;
1269         }
1270       ONE_MORE_BYTE (c2);
1271       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1272         break;
1273       if (UTF_8_3_OCTET_LEADING_P (c))
1274         {
1275           found = 1;
1276           if (src_base == coding->source
1277               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1278             bom_found = 1;
1279           continue;
1280         }
1281       ONE_MORE_BYTE (c3);
1282       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1283         break;
1284       if (UTF_8_4_OCTET_LEADING_P (c))
1285         {
1286           found = 1;
1287           continue;
1288         }
1289       ONE_MORE_BYTE (c4);
1290       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1291         break;
1292       if (UTF_8_5_OCTET_LEADING_P (c))
1293         {
1294           found = 1;
1295           continue;
1296         }
1297       break;
1298     }
1299   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1300   return 0;
1301
1302  no_more_source:
1303   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1304     {
1305       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1306       return 0;
1307     }
1308   if (bom_found)
1309     {
1310       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1311       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1312     }
1313   else
1314     {
1315       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1316       if (found)
1317         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1318     }
1319   return 1;
1320 }
1321
1322
1323 static void
1324 decode_coding_utf_8 (coding)
1325      struct coding_system *coding;
1326 {
1327   const unsigned char *src = coding->source + coding->consumed;
1328   const unsigned char *src_end = coding->source + coding->src_bytes;
1329   const unsigned char *src_base;
1330   int *charbuf = coding->charbuf + coding->charbuf_used;
1331   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1332   int consumed_chars = 0, consumed_chars_base = 0;
1333   int multibytep = coding->src_multibyte;
1334   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1335   Lisp_Object attr, charset_list;
1336   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1337   int byte_after_cr = -1;
1338
1339   CODING_GET_INFO (coding, attr, charset_list);
1340
1341   if (bom != utf_without_bom)
1342     {
1343       int c1, c2, c3;
1344
1345       src_base = src;
1346       ONE_MORE_BYTE (c1);
1347       if (! UTF_8_3_OCTET_LEADING_P (c1))
1348         src = src_base;
1349       else
1350         {
1351           ONE_MORE_BYTE (c2);
1352           if (! UTF_8_EXTRA_OCTET_P (c2))
1353             src = src_base;
1354           else
1355             {
1356               ONE_MORE_BYTE (c3);
1357               if (! UTF_8_EXTRA_OCTET_P (c3))
1358                 src = src_base;
1359               else
1360                 {
1361                   if ((c1 != UTF_8_BOM_1)
1362                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1363                     src = src_base;
1364                   else
1365                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1366                 }
1367             }
1368         }
1369     }
1370   CODING_UTF_8_BOM (coding) = utf_without_bom;
1371
1372
1373
1374   while (1)
1375     {
1376       int c, c1, c2, c3, c4, c5;
1377
1378       src_base = src;
1379       consumed_chars_base = consumed_chars;
1380
1381       if (charbuf >= charbuf_end)
1382         {
1383           if (byte_after_cr >= 0)
1384             src_base--;
1385           break;
1386         }
1387
1388       if (byte_after_cr >= 0)
1389         c1 = byte_after_cr, byte_after_cr = -1;
1390       else
1391         ONE_MORE_BYTE (c1);
1392       if (c1 < 0)
1393         {
1394           c = - c1;
1395         }
1396       else if (UTF_8_1_OCTET_P(c1))
1397         {
1398           if (eol_crlf && c1 == '\r')
1399             ONE_MORE_BYTE (byte_after_cr);
1400           c = c1;
1401         }
1402       else
1403         {
1404           ONE_MORE_BYTE (c2);
1405           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1406             goto invalid_code;
1407           if (UTF_8_2_OCTET_LEADING_P (c1))
1408             {
1409               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1410               /* Reject overlong sequences here and below.  Encoders
1411                  producing them are incorrect, they can be misleading,
1412                  and they mess up read/write invariance.  */
1413               if (c < 128)
1414                 goto invalid_code;
1415             }
1416           else
1417             {
1418               ONE_MORE_BYTE (c3);
1419               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1420                 goto invalid_code;
1421               if (UTF_8_3_OCTET_LEADING_P (c1))
1422                 {
1423                   c = (((c1 & 0xF) << 12)
1424                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1425                   if (c < 0x800
1426                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1427                     goto invalid_code;
1428                 }
1429               else
1430                 {
1431                   ONE_MORE_BYTE (c4);
1432                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1433                     goto invalid_code;
1434                   if (UTF_8_4_OCTET_LEADING_P (c1))
1435                     {
1436                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1437                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1438                     if (c < 0x10000)
1439                       goto invalid_code;
1440                     }
1441                   else
1442                     {
1443                       ONE_MORE_BYTE (c5);
1444                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1445                         goto invalid_code;
1446                       if (UTF_8_5_OCTET_LEADING_P (c1))
1447                         {
1448                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1449                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1450                                | (c5 & 0x3F));
1451                           if ((c > MAX_CHAR) || (c < 0x200000))
1452                             goto invalid_code;
1453                         }
1454                       else
1455                         goto invalid_code;
1456                     }
1457                 }
1458             }
1459         }
1460
1461       *charbuf++ = c;
1462       continue;
1463
1464     invalid_code:
1465       src = src_base;
1466       consumed_chars = consumed_chars_base;
1467       ONE_MORE_BYTE (c);
1468       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1469       coding->errors++;
1470     }
1471
1472  no_more_source:
1473   coding->consumed_char += consumed_chars_base;
1474   coding->consumed = src_base - coding->source;
1475   coding->charbuf_used = charbuf - coding->charbuf;
1476 }
1477
1478
1479 static int
1480 encode_coding_utf_8 (coding)
1481      struct coding_system *coding;
1482 {
1483   int multibytep = coding->dst_multibyte;
1484   int *charbuf = coding->charbuf;
1485   int *charbuf_end = charbuf + coding->charbuf_used;
1486   unsigned char *dst = coding->destination + coding->produced;
1487   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1488   int produced_chars = 0;
1489   int c;
1490
1491   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1492     {
1493       ASSURE_DESTINATION (3);
1494       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1495       CODING_UTF_8_BOM (coding) = utf_without_bom;
1496     }
1497
1498   if (multibytep)
1499     {
1500       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1501
1502       while (charbuf < charbuf_end)
1503         {
1504           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1505
1506           ASSURE_DESTINATION (safe_room);
1507           c = *charbuf++;
1508           if (CHAR_BYTE8_P (c))
1509             {
1510               c = CHAR_TO_BYTE8 (c);
1511               EMIT_ONE_BYTE (c);
1512             }
1513           else
1514             {
1515               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1516               for (p = str; p < pend; p++)
1517                 EMIT_ONE_BYTE (*p);
1518             }
1519         }
1520     }
1521   else
1522     {
1523       int safe_room = MAX_MULTIBYTE_LENGTH;
1524
1525       while (charbuf < charbuf_end)
1526         {
1527           ASSURE_DESTINATION (safe_room);
1528           c = *charbuf++;
1529           if (CHAR_BYTE8_P (c))
1530             *dst++ = CHAR_TO_BYTE8 (c);
1531           else
1532             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1533           produced_chars++;
1534         }
1535     }
1536   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1537   coding->produced_char += produced_chars;
1538   coding->produced = dst - coding->destination;
1539   return 0;
1540 }
1541
1542
1543 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1544    Check if a text is encoded in one of UTF-16 based coding systems.
1545    If it is, return 1, else return 0.  */
1546
1547 #define UTF_16_HIGH_SURROGATE_P(val) \
1548   (((val) & 0xFC00) == 0xD800)
1549
1550 #define UTF_16_LOW_SURROGATE_P(val) \
1551   (((val) & 0xFC00) == 0xDC00)
1552
1553 #define UTF_16_INVALID_P(val)   \
1554   (((val) == 0xFFFE)            \
1555    || ((val) == 0xFFFF)         \
1556    || UTF_16_LOW_SURROGATE_P (val))
1557
1558
1559 static int
1560 detect_coding_utf_16 (coding, detect_info)
1561      struct coding_system *coding;
1562      struct coding_detection_info *detect_info;
1563 {
1564   const unsigned char *src = coding->source, *src_base = src;
1565   const unsigned char *src_end = coding->source + coding->src_bytes;
1566   int multibytep = coding->src_multibyte;
1567   int consumed_chars = 0;
1568   int c1, c2;
1569
1570   detect_info->checked |= CATEGORY_MASK_UTF_16;
1571   if (coding->mode & CODING_MODE_LAST_BLOCK
1572       && (coding->src_chars & 1))
1573     {
1574       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1575       return 0;
1576     }
1577
1578   ONE_MORE_BYTE (c1);
1579   ONE_MORE_BYTE (c2);
1580   if ((c1 == 0xFF) && (c2 == 0xFE))
1581     {
1582       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1583                              | CATEGORY_MASK_UTF_16_AUTO);
1584       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1585                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1586                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1587     }
1588   else if ((c1 == 0xFE) && (c2 == 0xFF))
1589     {
1590       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1591                              | CATEGORY_MASK_UTF_16_AUTO);
1592       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1593                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1594                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1595     }
1596   else
1597     {
1598       /* We check the dispersion of Eth and Oth bytes where E is even and
1599          O is odd.  If both are high, we assume binary data.*/
1600       unsigned char e[256], o[256];
1601       unsigned e_num = 1, o_num = 1;
1602
1603       memset (e, 0, 256);
1604       memset (o, 0, 256);
1605       e[c1] = 1;
1606       o[c2] = 1;
1607
1608       detect_info->rejected
1609         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1610
1611       while (1)
1612         {
1613           ONE_MORE_BYTE (c1);
1614           ONE_MORE_BYTE (c2);
1615           if (! e[c1])
1616             {
1617               e[c1] = 1;
1618               e_num++;
1619               if (e_num >= 128)
1620                 break;
1621             }
1622           if (! o[c2])
1623             {
1624               o[c1] = 1;
1625               o_num++;
1626               if (o_num >= 128)
1627                 break;
1628             }
1629         }
1630       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1631       return 0;
1632     }
1633
1634  no_more_source:
1635   return 1;
1636 }
1637
1638 static void
1639 decode_coding_utf_16 (coding)
1640      struct coding_system *coding;
1641 {
1642   const unsigned char *src = coding->source + coding->consumed;
1643   const unsigned char *src_end = coding->source + coding->src_bytes;
1644   const unsigned char *src_base;
1645   int *charbuf = coding->charbuf + coding->charbuf_used;
1646   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1647   int consumed_chars = 0, consumed_chars_base = 0;
1648   int multibytep = coding->src_multibyte;
1649   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1650   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1651   int surrogate = CODING_UTF_16_SURROGATE (coding);
1652   Lisp_Object attr, charset_list;
1653   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1654   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1655
1656   CODING_GET_INFO (coding, attr, charset_list);
1657
1658   if (bom == utf_with_bom)
1659     {
1660       int c, c1, c2;
1661
1662       src_base = src;
1663       ONE_MORE_BYTE (c1);
1664       ONE_MORE_BYTE (c2);
1665       c = (c1 << 8) | c2;
1666
1667       if (endian == utf_16_big_endian
1668           ? c != 0xFEFF : c != 0xFFFE)
1669         {
1670           /* The first two bytes are not BOM.  Treat them as bytes
1671              for a normal character.  */
1672           src = src_base;
1673           coding->errors++;
1674         }
1675       CODING_UTF_16_BOM (coding) = utf_without_bom;
1676     }
1677   else if (bom == utf_detect_bom)
1678     {
1679       /* We have already tried to detect BOM and failed in
1680          detect_coding.  */
1681       CODING_UTF_16_BOM (coding) = utf_without_bom;
1682     }
1683
1684   while (1)
1685     {
1686       int c, c1, c2;
1687
1688       src_base = src;
1689       consumed_chars_base = consumed_chars;
1690
1691       if (charbuf + 2 >= charbuf_end)
1692         {
1693           if (byte_after_cr1 >= 0)
1694             src_base -= 2;
1695           break;
1696         }
1697
1698       if (byte_after_cr1 >= 0)
1699         c1 = byte_after_cr1, byte_after_cr1 = -1;
1700       else
1701         ONE_MORE_BYTE (c1);
1702       if (c1 < 0)
1703         {
1704           *charbuf++ = -c1;
1705           continue;
1706         }
1707       if (byte_after_cr2 >= 0)
1708         c2 = byte_after_cr2, byte_after_cr2 = -1;
1709       else
1710         ONE_MORE_BYTE (c2);
1711       if (c2 < 0)
1712         {
1713           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1714           *charbuf++ = -c2;
1715           continue;
1716         }
1717       c = (endian == utf_16_big_endian
1718            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1719
1720       if (surrogate)
1721         {
1722           if (! UTF_16_LOW_SURROGATE_P (c))
1723             {
1724               if (endian == utf_16_big_endian)
1725                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1726               else
1727                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1728               *charbuf++ = c1;
1729               *charbuf++ = c2;
1730               coding->errors++;
1731               if (UTF_16_HIGH_SURROGATE_P (c))
1732                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1733               else
1734                 *charbuf++ = c;
1735             }
1736           else
1737             {
1738               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1739               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1740               *charbuf++ = 0x10000 + c;
1741             }
1742         }
1743       else
1744         {
1745           if (UTF_16_HIGH_SURROGATE_P (c))
1746             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1747           else
1748             {
1749               if (eol_crlf && c == '\r')
1750                 {
1751                   ONE_MORE_BYTE (byte_after_cr1);
1752                   ONE_MORE_BYTE (byte_after_cr2);
1753                 }
1754               *charbuf++ = c;
1755             }
1756         }
1757     }
1758
1759  no_more_source:
1760   coding->consumed_char += consumed_chars_base;
1761   coding->consumed = src_base - coding->source;
1762   coding->charbuf_used = charbuf - coding->charbuf;
1763 }
1764
1765 static int
1766 encode_coding_utf_16 (coding)
1767      struct coding_system *coding;
1768 {
1769   int multibytep = coding->dst_multibyte;
1770   int *charbuf = coding->charbuf;
1771   int *charbuf_end = charbuf + coding->charbuf_used;
1772   unsigned char *dst = coding->destination + coding->produced;
1773   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1774   int safe_room = 8;
1775   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1776   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1777   int produced_chars = 0;
1778   Lisp_Object attrs, charset_list;
1779   int c;
1780
1781   CODING_GET_INFO (coding, attrs, charset_list);
1782
1783   if (bom != utf_without_bom)
1784     {
1785       ASSURE_DESTINATION (safe_room);
1786       if (big_endian)
1787         EMIT_TWO_BYTES (0xFE, 0xFF);
1788       else
1789         EMIT_TWO_BYTES (0xFF, 0xFE);
1790       CODING_UTF_16_BOM (coding) = utf_without_bom;
1791     }
1792
1793   while (charbuf < charbuf_end)
1794     {
1795       ASSURE_DESTINATION (safe_room);
1796       c = *charbuf++;
1797       if (c >= MAX_UNICODE_CHAR)
1798         c = coding->default_char;
1799
1800       if (c < 0x10000)
1801         {
1802           if (big_endian)
1803             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1804           else
1805             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1806         }
1807       else
1808         {
1809           int c1, c2;
1810
1811           c -= 0x10000;
1812           c1 = (c >> 10) + 0xD800;
1813           c2 = (c & 0x3FF) + 0xDC00;
1814           if (big_endian)
1815             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1816           else
1817             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1818         }
1819     }
1820   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1821   coding->produced = dst - coding->destination;
1822   coding->produced_char += produced_chars;
1823   return 0;
1824 }
1825
1826 \f
1827 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1828
1829 /* Emacs' internal format for representation of multiple character
1830    sets is a kind of multi-byte encoding, i.e. characters are
1831    represented by variable-length sequences of one-byte codes.
1832
1833    ASCII characters and control characters (e.g. `tab', `newline') are
1834    represented by one-byte sequences which are their ASCII codes, in
1835    the range 0x00 through 0x7F.
1836
1837    8-bit characters of the range 0x80..0x9F are represented by
1838    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1839    code + 0x20).
1840
1841    8-bit characters of the range 0xA0..0xFF are represented by
1842    one-byte sequences which are their 8-bit code.
1843
1844    The other characters are represented by a sequence of `base
1845    leading-code', optional `extended leading-code', and one or two
1846    `position-code's.  The length of the sequence is determined by the
1847    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1848    whereas extended leading-code and position-code take the range 0xA0
1849    through 0xFF.  See `charset.h' for more details about leading-code
1850    and position-code.
1851
1852    --- CODE RANGE of Emacs' internal format ---
1853    character set        range
1854    -------------        -----
1855    ascii                0x00..0x7F
1856    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1857    eight-bit-graphic    0xA0..0xBF
1858    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1859    ---------------------------------------------
1860
1861    As this is the internal character representation, the format is
1862    usually not used externally (i.e. in a file or in a data sent to a
1863    process).  But, it is possible to have a text externally in this
1864    format (i.e. by encoding by the coding system `emacs-mule').
1865
1866    In that case, a sequence of one-byte codes has a slightly different
1867    form.
1868
1869    At first, all characters in eight-bit-control are represented by
1870    one-byte sequences which are their 8-bit code.
1871
1872    Next, character composition data are represented by the byte
1873    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1874    where,
1875         METHOD is 0xF0 plus one of composition method (enum
1876         composition_method),
1877
1878         BYTES is 0xA0 plus a byte length of this composition data,
1879
1880         CHARS is 0x20 plus a number of characters composed by this
1881         data,
1882
1883         COMPONENTs are characters of multibye form or composition
1884         rules encoded by two-byte of ASCII codes.
1885
1886    In addition, for backward compatibility, the following formats are
1887    also recognized as composition data on decoding.
1888
1889    0x80 MSEQ ...
1890    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1891
1892    Here,
1893         MSEQ is a multibyte form but in these special format:
1894           ASCII: 0xA0 ASCII_CODE+0x80,
1895           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1896         RULE is a one byte code of the range 0xA0..0xF0 that
1897         represents a composition rule.
1898   */
1899
1900 char emacs_mule_bytes[256];
1901
1902 int
1903 emacs_mule_char (coding, src, nbytes, nchars, id)
1904      struct coding_system *coding;
1905      const unsigned char *src;
1906      int *nbytes, *nchars, *id;
1907 {
1908   const unsigned char *src_end = coding->source + coding->src_bytes;
1909   const unsigned char *src_base = src;
1910   int multibytep = coding->src_multibyte;
1911   struct charset *charset;
1912   unsigned code;
1913   int c;
1914   int consumed_chars = 0;
1915
1916   ONE_MORE_BYTE (c);
1917   if (c < 0)
1918     {
1919       c = -c;
1920       charset = emacs_mule_charset[0];
1921     }
1922   else
1923     {
1924       if (c >= 0xA0)
1925         {
1926           /* Old style component character of a composition.  */
1927           if (c == 0xA0)
1928             {
1929               ONE_MORE_BYTE (c);
1930               c -= 0x80;
1931             }
1932           else
1933             c -= 0x20;
1934         }
1935
1936       switch (emacs_mule_bytes[c])
1937         {
1938         case 2:
1939           if (! (charset = emacs_mule_charset[c]))
1940             goto invalid_code;
1941           ONE_MORE_BYTE (c);
1942           if (c < 0xA0)
1943             goto invalid_code;
1944           code = c & 0x7F;
1945           break;
1946
1947         case 3:
1948           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1949               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1950             {
1951               ONE_MORE_BYTE (c);
1952               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
1953                 goto invalid_code;
1954               ONE_MORE_BYTE (c);
1955               if (c < 0xA0)
1956                 goto invalid_code;
1957               code = c & 0x7F;
1958             }
1959           else
1960             {
1961               if (! (charset = emacs_mule_charset[c]))
1962                 goto invalid_code;
1963               ONE_MORE_BYTE (c);
1964               if (c < 0xA0)
1965                 goto invalid_code;
1966               code = (c & 0x7F) << 8;
1967               ONE_MORE_BYTE (c);
1968               if (c < 0xA0)
1969                 goto invalid_code;
1970               code |= c & 0x7F;
1971             }
1972           break;
1973
1974         case 4:
1975           ONE_MORE_BYTE (c);
1976           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1977             goto invalid_code;
1978           ONE_MORE_BYTE (c);
1979           if (c < 0xA0)
1980             goto invalid_code;
1981           code = (c & 0x7F) << 8;
1982           ONE_MORE_BYTE (c);
1983           if (c < 0xA0)
1984             goto invalid_code;
1985           code |= c & 0x7F;
1986           break;
1987
1988         case 1:
1989           code = c;
1990           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1991                                      ? charset_ascii : charset_eight_bit);
1992           break;
1993
1994         default:
1995           abort ();
1996         }
1997       c = DECODE_CHAR (charset, code);
1998       if (c < 0)
1999         goto invalid_code;
2000     }
2001   *nbytes = src - src_base;
2002   *nchars = consumed_chars;
2003   if (id)
2004     *id = charset->id;
2005   return c;
2006
2007  no_more_source:
2008   return -2;
2009
2010  invalid_code:
2011   return -1;
2012 }
2013
2014
2015 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2016    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
2017    else return 0.  */
2018
2019 static int
2020 detect_coding_emacs_mule (coding, detect_info)
2021      struct coding_system *coding;
2022      struct coding_detection_info *detect_info;
2023 {
2024   const unsigned char *src = coding->source, *src_base;
2025   const unsigned char *src_end = coding->source + coding->src_bytes;
2026   int multibytep = coding->src_multibyte;
2027   int consumed_chars = 0;
2028   int c;
2029   int found = 0;
2030
2031   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
2032   /* A coding system of this category is always ASCII compatible.  */
2033   src += coding->head_ascii;
2034
2035   while (1)
2036     {
2037       src_base = src;
2038       ONE_MORE_BYTE (c);
2039       if (c < 0)
2040         continue;
2041       if (c == 0x80)
2042         {
2043           /* Perhaps the start of composite character.  We simple skip
2044              it because analyzing it is too heavy for detecting.  But,
2045              at least, we check that the composite character
2046              constitutes of more than 4 bytes.  */
2047           const unsigned char *src_base;
2048
2049         repeat:
2050           src_base = src;
2051           do
2052             {
2053               ONE_MORE_BYTE (c);
2054             }
2055           while (c >= 0xA0);
2056
2057           if (src - src_base <= 4)
2058             break;
2059           found = CATEGORY_MASK_EMACS_MULE;
2060           if (c == 0x80)
2061             goto repeat;
2062         }
2063
2064       if (c < 0x80)
2065         {
2066           if (c < 0x20
2067               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2068             break;
2069         }
2070       else
2071         {
2072           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2073
2074           while (more_bytes > 0)
2075             {
2076               ONE_MORE_BYTE (c);
2077               if (c < 0xA0)
2078                 {
2079                   src--;        /* Unread the last byte.  */
2080                   break;
2081                 }
2082               more_bytes--;
2083             }
2084           if (more_bytes != 0)
2085             break;
2086           found = CATEGORY_MASK_EMACS_MULE;
2087         }
2088     }
2089   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2090   return 0;
2091
2092  no_more_source:
2093   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2094     {
2095       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2096       return 0;
2097     }
2098   detect_info->found |= found;
2099   return 1;
2100 }
2101
2102
2103 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2104
2105 /* Decode a character represented as a component of composition
2106    sequence of Emacs 20/21 style at SRC.  Set C to that character and
2107    update SRC to the head of next character (or an encoded composition
2108    rule).  If SRC doesn't points a composition component, set C to -1.
2109    If SRC points an invalid byte sequence, global exit by a return
2110    value 0.  */
2111
2112 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
2113   do                                                            \
2114     {                                                           \
2115       int c;                                                    \
2116       int nbytes, nchars;                                       \
2117                                                                 \
2118       if (src == src_end)                                       \
2119         break;                                                  \
2120       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
2121       if (c < 0)                                                \
2122         {                                                       \
2123           if (c == -2)                                          \
2124             break;                                              \
2125           goto invalid_code;                                    \
2126         }                                                       \
2127       *buf++ = c;                                               \
2128       src += nbytes;                                            \
2129       consumed_chars += nchars;                                 \
2130     }                                                           \
2131   while (0)
2132
2133
2134 /* Decode a composition rule represented as a component of composition
2135    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
2136    and increment BUF.  If SRC points an invalid byte sequence, set C
2137    to -1.  */
2138
2139 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
2140   do {                                                  \
2141     int c, gref, nref;                                  \
2142                                                         \
2143     if (src >= src_end)                                 \
2144       goto invalid_code;                                \
2145     ONE_MORE_BYTE_NO_CHECK (c);                         \
2146     c -= 0xA0;                                          \
2147     if (c < 0 || c >= 81)                               \
2148       goto invalid_code;                                \
2149                                                         \
2150     gref = c / 9, nref = c % 9;                         \
2151     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2152   } while (0)
2153
2154
2155 /* Decode a composition rule represented as a component of composition
2156    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
2157    and increment BUF.  If SRC points an invalid byte sequence, set C
2158    to -1.  */
2159
2160 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
2161   do {                                                  \
2162     int gref, nref;                                     \
2163                                                         \
2164     if (src + 1>= src_end)                              \
2165       goto invalid_code;                                \
2166     ONE_MORE_BYTE_NO_CHECK (gref);                      \
2167     gref -= 0x20;                                       \
2168     ONE_MORE_BYTE_NO_CHECK (nref);                      \
2169     nref -= 0x20;                                       \
2170     if (gref < 0 || gref >= 81                          \
2171         || nref < 0 || nref >= 81)                      \
2172       goto invalid_code;                                \
2173     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2174   } while (0)
2175
2176
2177 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
2178   do {                                                                  \
2179     /* Emacs 21 style format.  The first three bytes at SRC are         \
2180        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
2181        the byte length of this composition information, CHARS is the    \
2182        number of characters composed by this composition.  */           \
2183     enum composition_method method = c - 0xF2;                          \
2184     int *charbuf_base = charbuf;                                        \
2185     int consumed_chars_limit;                                           \
2186     int nbytes, nchars;                                                 \
2187                                                                         \
2188     ONE_MORE_BYTE (c);                                                  \
2189     if (c < 0)                                                          \
2190       goto invalid_code;                                                \
2191     nbytes = c - 0xA0;                                                  \
2192     if (nbytes < 3)                                                     \
2193       goto invalid_code;                                                \
2194     ONE_MORE_BYTE (c);                                                  \
2195     if (c < 0)                                                          \
2196       goto invalid_code;                                                \
2197     nchars = c - 0xA0;                                                  \
2198     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2199     consumed_chars_limit = consumed_chars_base + nbytes;                \
2200     if (method != COMPOSITION_RELATIVE)                                 \
2201       {                                                                 \
2202         int i = 0;                                                      \
2203         while (consumed_chars < consumed_chars_limit)                   \
2204           {                                                             \
2205             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
2206               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
2207             else                                                        \
2208               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
2209             i++;                                                        \
2210           }                                                             \
2211         if (consumed_chars < consumed_chars_limit)                      \
2212           goto invalid_code;                                            \
2213         charbuf_base[0] -= i;                                           \
2214       }                                                                 \
2215   } while (0)
2216
2217
2218 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)                    \
2219   do {                                                                  \
2220     /* Emacs 20 style format for relative composition.  */              \
2221     /* Store multibyte form of characters to be composed.  */           \
2222     enum composition_method method = COMPOSITION_RELATIVE;              \
2223     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];                 \
2224     int *buf = components;                                              \
2225     int i, j;                                                           \
2226                                                                         \
2227     src = src_base;                                                     \
2228     ONE_MORE_BYTE (c);          /* skip 0x80 */                         \
2229     for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++)    \
2230       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                         \
2231     if (i < 2)                                                          \
2232       goto invalid_code;                                                \
2233     ADD_COMPOSITION_DATA (charbuf, i, method);                          \
2234     for (j = 0; j < i; j++)                                             \
2235       *charbuf++ = components[j];                                       \
2236   } while (0)
2237
2238
2239 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
2240   do {                                                          \
2241     /* Emacs 20 style format for rule-base composition.  */     \
2242     /* Store multibyte form of characters to be composed.  */   \
2243     enum composition_method method = COMPOSITION_WITH_RULE;     \
2244     int *charbuf_base = charbuf;                                \
2245     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
2246     int *buf = components;                                      \
2247     int i, j;                                                   \
2248                                                                 \
2249     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
2250     for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2251       {                                                         \
2252         if (*src < 0xA0)                                        \
2253           break;                                                \
2254         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
2255         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
2256       }                                                         \
2257     if (i <= 1 || (buf - components) % 2 == 0)                  \
2258       goto invalid_code;                                        \
2259     if (charbuf + i + (i / 2) + 1 >= charbuf_end)               \
2260       goto no_more_source;                                      \
2261     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
2262     i = i * 2 - 1;                                              \
2263     for (j = 0; j < i; j++)                                     \
2264       *charbuf++ = components[j];                               \
2265     charbuf_base[0] -= i;                                       \
2266     for (j = 0; j < i; j += 2)                                  \
2267       *charbuf++ = components[j];                               \
2268   } while (0)
2269
2270
2271 static void
2272 decode_coding_emacs_mule (coding)
2273      struct coding_system *coding;
2274 {
2275   const unsigned char *src = coding->source + coding->consumed;
2276   const unsigned char *src_end = coding->source + coding->src_bytes;
2277   const unsigned char *src_base;
2278   int *charbuf = coding->charbuf + coding->charbuf_used;
2279   int *charbuf_end
2280     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2281   int consumed_chars = 0, consumed_chars_base;
2282   int multibytep = coding->src_multibyte;
2283   Lisp_Object attrs, charset_list;
2284   int char_offset = coding->produced_char;
2285   int last_offset = char_offset;
2286   int last_id = charset_ascii;
2287   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2288   int byte_after_cr = -1;
2289
2290   CODING_GET_INFO (coding, attrs, charset_list);
2291
2292   while (1)
2293     {
2294       int c;
2295
2296       src_base = src;
2297       consumed_chars_base = consumed_chars;
2298
2299       if (charbuf >= charbuf_end)
2300         {
2301           if (byte_after_cr >= 0)
2302             src_base--;
2303           break;
2304         }
2305
2306       if (byte_after_cr >= 0)
2307         c = byte_after_cr, byte_after_cr = -1;
2308       else
2309         ONE_MORE_BYTE (c);
2310       if (c < 0)
2311         {
2312           *charbuf++ = -c;
2313           char_offset++;
2314         }
2315       else if (c < 0x80)
2316         {
2317           if (eol_crlf && c == '\r')
2318             ONE_MORE_BYTE (byte_after_cr);
2319           *charbuf++ = c;
2320           char_offset++;
2321         }
2322       else if (c == 0x80)
2323         {
2324           ONE_MORE_BYTE (c);
2325           if (c < 0)
2326             goto invalid_code;
2327           if (c - 0xF2 >= COMPOSITION_RELATIVE
2328               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2329             DECODE_EMACS_MULE_21_COMPOSITION (c);
2330           else if (c < 0xC0)
2331             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2332           else if (c == 0xFF)
2333             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2334           else
2335             goto invalid_code;
2336         }
2337       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2338         {
2339           int nbytes, nchars;
2340           int id;
2341
2342           src = src_base;
2343           consumed_chars = consumed_chars_base;
2344           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2345           if (c < 0)
2346             {
2347               if (c == -2)
2348                 break;
2349               goto invalid_code;
2350             }
2351           if (last_id != id)
2352             {
2353               if (last_id != charset_ascii)
2354                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2355               last_id = id;
2356               last_offset = char_offset;
2357             }
2358           *charbuf++ = c;
2359           src += nbytes;
2360           consumed_chars += nchars;
2361           char_offset++;
2362         }
2363       else
2364         goto invalid_code;
2365       continue;
2366
2367     invalid_code:
2368       src = src_base;
2369       consumed_chars = consumed_chars_base;
2370       ONE_MORE_BYTE (c);
2371       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2372       char_offset++;
2373       coding->errors++;
2374     }
2375
2376  no_more_source:
2377   if (last_id != charset_ascii)
2378     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2379   coding->consumed_char += consumed_chars_base;
2380   coding->consumed = src_base - coding->source;
2381   coding->charbuf_used = charbuf - coding->charbuf;
2382 }
2383
2384
2385 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2386   do {                                          \
2387     if (id < 0xA0)                              \
2388       codes[0] = id, codes[1] = 0;              \
2389     else if (id < 0xE0)                         \
2390       codes[0] = 0x9A, codes[1] = id;           \
2391     else if (id < 0xF0)                         \
2392       codes[0] = 0x9B, codes[1] = id;           \
2393     else if (id < 0xF5)                         \
2394       codes[0] = 0x9C, codes[1] = id;           \
2395     else                                        \
2396       codes[0] = 0x9D, codes[1] = id;           \
2397   } while (0);
2398
2399
2400 static int
2401 encode_coding_emacs_mule (coding)
2402      struct coding_system *coding;
2403 {
2404   int multibytep = coding->dst_multibyte;
2405   int *charbuf = coding->charbuf;
2406   int *charbuf_end = charbuf + coding->charbuf_used;
2407   unsigned char *dst = coding->destination + coding->produced;
2408   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2409   int safe_room = 8;
2410   int produced_chars = 0;
2411   Lisp_Object attrs, charset_list;
2412   int c;
2413   int preferred_charset_id = -1;
2414
2415   CODING_GET_INFO (coding, attrs, charset_list);
2416   if (! EQ (charset_list, Vemacs_mule_charset_list))
2417     {
2418       CODING_ATTR_CHARSET_LIST (attrs)
2419         = charset_list = Vemacs_mule_charset_list;
2420     }
2421
2422   while (charbuf < charbuf_end)
2423     {
2424       ASSURE_DESTINATION (safe_room);
2425       c = *charbuf++;
2426
2427       if (c < 0)
2428         {
2429           /* Handle an annotation.  */
2430           switch (*charbuf)
2431             {
2432             case CODING_ANNOTATE_COMPOSITION_MASK:
2433               /* Not yet implemented.  */
2434               break;
2435             case CODING_ANNOTATE_CHARSET_MASK:
2436               preferred_charset_id = charbuf[3];
2437               if (preferred_charset_id >= 0
2438                   && NILP (Fmemq (make_number (preferred_charset_id),
2439                                   charset_list)))
2440                 preferred_charset_id = -1;
2441               break;
2442             default:
2443               abort ();
2444             }
2445           charbuf += -c - 1;
2446           continue;
2447         }
2448
2449       if (ASCII_CHAR_P (c))
2450         EMIT_ONE_ASCII_BYTE (c);
2451       else if (CHAR_BYTE8_P (c))
2452         {
2453           c = CHAR_TO_BYTE8 (c);
2454           EMIT_ONE_BYTE (c);
2455         }
2456       else
2457         {
2458           struct charset *charset;
2459           unsigned code;
2460           int dimension;
2461           int emacs_mule_id;
2462           unsigned char leading_codes[2];
2463
2464           if (preferred_charset_id >= 0)
2465             {
2466               charset = CHARSET_FROM_ID (preferred_charset_id);
2467               if (CHAR_CHARSET_P (c, charset))
2468                 code = ENCODE_CHAR (charset, c);
2469               else
2470                 charset = char_charset (c, charset_list, &code);
2471             }
2472           else
2473             charset = char_charset (c, charset_list, &code);
2474           if (! charset)
2475             {
2476               c = coding->default_char;
2477               if (ASCII_CHAR_P (c))
2478                 {
2479                   EMIT_ONE_ASCII_BYTE (c);
2480                   continue;
2481                 }
2482               charset = char_charset (c, charset_list, &code);
2483             }
2484           dimension = CHARSET_DIMENSION (charset);
2485           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2486           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2487           EMIT_ONE_BYTE (leading_codes[0]);
2488           if (leading_codes[1])
2489             EMIT_ONE_BYTE (leading_codes[1]);
2490           if (dimension == 1)
2491             EMIT_ONE_BYTE (code | 0x80);
2492           else
2493             {
2494               code |= 0x8080;
2495               EMIT_ONE_BYTE (code >> 8);
2496               EMIT_ONE_BYTE (code & 0xFF);
2497             }
2498         }
2499     }
2500   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2501   coding->produced_char += produced_chars;
2502   coding->produced = dst - coding->destination;
2503   return 0;
2504 }
2505
2506 \f
2507 /*** 7. ISO2022 handlers ***/
2508
2509 /* The following note describes the coding system ISO2022 briefly.
2510    Since the intention of this note is to help understand the
2511    functions in this file, some parts are NOT ACCURATE or are OVERLY
2512    SIMPLIFIED.  For thorough understanding, please refer to the
2513    original document of ISO2022.  This is equivalent to the standard
2514    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2515
2516    ISO2022 provides many mechanisms to encode several character sets
2517    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2518    is encoded using bytes less than 128.  This may make the encoded
2519    text a little bit longer, but the text passes more easily through
2520    several types of gateway, some of which strip off the MSB (Most
2521    Significant Bit).
2522
2523    There are two kinds of character sets: control character sets and
2524    graphic character sets.  The former contain control characters such
2525    as `newline' and `escape' to provide control functions (control
2526    functions are also provided by escape sequences).  The latter
2527    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2528    two control character sets and many graphic character sets.
2529
2530    Graphic character sets are classified into one of the following
2531    four classes, according to the number of bytes (DIMENSION) and
2532    number of characters in one dimension (CHARS) of the set:
2533    - DIMENSION1_CHARS94
2534    - DIMENSION1_CHARS96
2535    - DIMENSION2_CHARS94
2536    - DIMENSION2_CHARS96
2537
2538    In addition, each character set is assigned an identification tag,
2539    unique for each set, called the "final character" (denoted as <F>
2540    hereafter).  The <F> of each character set is decided by ECMA(*)
2541    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2542    (0x30..0x3F are for private use only).
2543
2544    Note (*): ECMA = European Computer Manufacturers Association
2545
2546    Here are examples of graphic character sets [NAME(<F>)]:
2547         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2548         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2549         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2550         o DIMENSION2_CHARS96 -- none for the moment
2551
2552    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2553         C0 [0x00..0x1F] -- control character plane 0
2554         GL [0x20..0x7F] -- graphic character plane 0
2555         C1 [0x80..0x9F] -- control character plane 1
2556         GR [0xA0..0xFF] -- graphic character plane 1
2557
2558    A control character set is directly designated and invoked to C0 or
2559    C1 by an escape sequence.  The most common case is that:
2560    - ISO646's  control character set is designated/invoked to C0, and
2561    - ISO6429's control character set is designated/invoked to C1,
2562    and usually these designations/invocations are omitted in encoded
2563    text.  In a 7-bit environment, only C0 can be used, and a control
2564    character for C1 is encoded by an appropriate escape sequence to
2565    fit into the environment.  All control characters for C1 are
2566    defined to have corresponding escape sequences.
2567
2568    A graphic character set is at first designated to one of four
2569    graphic registers (G0 through G3), then these graphic registers are
2570    invoked to GL or GR.  These designations and invocations can be
2571    done independently.  The most common case is that G0 is invoked to
2572    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2573    these invocations and designations are omitted in encoded text.
2574    In a 7-bit environment, only GL can be used.
2575
2576    When a graphic character set of CHARS94 is invoked to GL, codes
2577    0x20 and 0x7F of the GL area work as control characters SPACE and
2578    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2579    be used.
2580
2581    There are two ways of invocation: locking-shift and single-shift.
2582    With locking-shift, the invocation lasts until the next different
2583    invocation, whereas with single-shift, the invocation affects the
2584    following character only and doesn't affect the locking-shift
2585    state.  Invocations are done by the following control characters or
2586    escape sequences:
2587
2588    ----------------------------------------------------------------------
2589    abbrev  function                  cntrl escape seq   description
2590    ----------------------------------------------------------------------
2591    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2592    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2593    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2594    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2595    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2596    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2597    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2598    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2599    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2600    ----------------------------------------------------------------------
2601    (*) These are not used by any known coding system.
2602
2603    Control characters for these functions are defined by macros
2604    ISO_CODE_XXX in `coding.h'.
2605
2606    Designations are done by the following escape sequences:
2607    ----------------------------------------------------------------------
2608    escape sequence      description
2609    ----------------------------------------------------------------------
2610    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2611    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2612    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2613    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2614    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2615    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2616    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2617    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2618    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2619    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2620    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2621    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2622    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2623    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2624    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2625    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2626    ----------------------------------------------------------------------
2627
2628    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2629    of dimension 1, chars 94, and final character <F>, etc...
2630
2631    Note (*): Although these designations are not allowed in ISO2022,
2632    Emacs accepts them on decoding, and produces them on encoding
2633    CHARS96 character sets in a coding system which is characterized as
2634    7-bit environment, non-locking-shift, and non-single-shift.
2635
2636    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2637    '(' must be omitted.  We refer to this as "short-form" hereafter.
2638
2639    Now you may notice that there are a lot of ways of encoding the
2640    same multilingual text in ISO2022.  Actually, there exist many
2641    coding systems such as Compound Text (used in X11's inter client
2642    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2643    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2644    localized platforms), and all of these are variants of ISO2022.
2645
2646    In addition to the above, Emacs handles two more kinds of escape
2647    sequences: ISO6429's direction specification and Emacs' private
2648    sequence for specifying character composition.
2649
2650    ISO6429's direction specification takes the following form:
2651         o CSI ']'      -- end of the current direction
2652         o CSI '0' ']'  -- end of the current direction
2653         o CSI '1' ']'  -- start of left-to-right text
2654         o CSI '2' ']'  -- start of right-to-left text
2655    The control character CSI (0x9B: control sequence introducer) is
2656    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2657
2658    Character composition specification takes the following form:
2659         o ESC '0' -- start relative composition
2660         o ESC '1' -- end composition
2661         o ESC '2' -- start rule-base composition (*)
2662         o ESC '3' -- start relative composition with alternate chars  (**)
2663         o ESC '4' -- start rule-base composition with alternate chars  (**)
2664   Since these are not standard escape sequences of any ISO standard,
2665   the use of them with these meanings is restricted to Emacs only.
2666
2667   (*) This form is used only in Emacs 20.7 and older versions,
2668   but newer versions can safely decode it.
2669   (**) This form is used only in Emacs 21.1 and newer versions,
2670   and older versions can't decode it.
2671
2672   Here's a list of example usages of these composition escape
2673   sequences (categorized by `enum composition_method').
2674
2675   COMPOSITION_RELATIVE:
2676         ESC 0 CHAR [ CHAR ] ESC 1
2677   COMPOSITION_WITH_RULE:
2678         ESC 2 CHAR [ RULE CHAR ] ESC 1
2679   COMPOSITION_WITH_ALTCHARS:
2680         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2681   COMPOSITION_WITH_RULE_ALTCHARS:
2682         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2683
2684 enum iso_code_class_type iso_code_class[256];
2685
2686 #define SAFE_CHARSET_P(coding, id)      \
2687   ((id) <= (coding)->max_charset_id     \
2688    && (coding)->safe_charsets[id] >= 0)
2689
2690
2691 #define SHIFT_OUT_OK(category)  \
2692   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2693
2694 static void
2695 setup_iso_safe_charsets (attrs)
2696      Lisp_Object attrs;
2697 {
2698   Lisp_Object charset_list, safe_charsets;
2699   Lisp_Object request;
2700   Lisp_Object reg_usage;
2701   Lisp_Object tail;
2702   int reg94, reg96;
2703   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2704   int max_charset_id;
2705
2706   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2707   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2708       && ! EQ (charset_list, Viso_2022_charset_list))
2709     {
2710       CODING_ATTR_CHARSET_LIST (attrs)
2711         = charset_list = Viso_2022_charset_list;
2712       ASET (attrs, coding_attr_safe_charsets, Qnil);
2713     }
2714
2715   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2716     return;
2717
2718   max_charset_id = 0;
2719   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2720     {
2721       int id = XINT (XCAR (tail));
2722       if (max_charset_id < id)
2723         max_charset_id = id;
2724     }
2725
2726   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2727                                 make_number (255));
2728   request = AREF (attrs, coding_attr_iso_request);
2729   reg_usage = AREF (attrs, coding_attr_iso_usage);
2730   reg94 = XINT (XCAR (reg_usage));
2731   reg96 = XINT (XCDR (reg_usage));
2732
2733   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2734     {
2735       Lisp_Object id;
2736       Lisp_Object reg;
2737       struct charset *charset;
2738
2739       id = XCAR (tail);
2740       charset = CHARSET_FROM_ID (XINT (id));
2741       reg = Fcdr (Fassq (id, request));
2742       if (! NILP (reg))
2743         SSET (safe_charsets, XINT (id), XINT (reg));
2744       else if (charset->iso_chars_96)
2745         {
2746           if (reg96 < 4)
2747             SSET (safe_charsets, XINT (id), reg96);
2748         }
2749       else
2750         {
2751           if (reg94 < 4)
2752             SSET (safe_charsets, XINT (id), reg94);
2753         }
2754     }
2755   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2756 }
2757
2758
2759 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2760    Check if a text is encoded in one of ISO-2022 based codig systems.
2761    If it is, return 1, else return 0.  */
2762
2763 static int
2764 detect_coding_iso_2022 (coding, detect_info)
2765      struct coding_system *coding;
2766      struct coding_detection_info *detect_info;
2767 {
2768   const unsigned char *src = coding->source, *src_base = src;
2769   const unsigned char *src_end = coding->source + coding->src_bytes;
2770   int multibytep = coding->src_multibyte;
2771   int single_shifting = 0;
2772   int id;
2773   int c, c1;
2774   int consumed_chars = 0;
2775   int i;
2776   int rejected = 0;
2777   int found = 0;
2778   int composition_count = -1;
2779
2780   detect_info->checked |= CATEGORY_MASK_ISO;
2781
2782   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2783     {
2784       struct coding_system *this = &(coding_categories[i]);
2785       Lisp_Object attrs, val;
2786
2787       if (this->id < 0)
2788         continue;
2789       attrs = CODING_ID_ATTRS (this->id);
2790       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2791           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2792         setup_iso_safe_charsets (attrs);
2793       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2794       this->max_charset_id = SCHARS (val) - 1;
2795       this->safe_charsets = (char *) SDATA (val);
2796     }
2797
2798   /* A coding system of this category is always ASCII compatible.  */
2799   src += coding->head_ascii;
2800
2801   while (rejected != CATEGORY_MASK_ISO)
2802     {
2803       src_base = src;
2804       ONE_MORE_BYTE (c);
2805       switch (c)
2806         {
2807         case ISO_CODE_ESC:
2808           if (inhibit_iso_escape_detection)
2809             break;
2810           single_shifting = 0;
2811           ONE_MORE_BYTE (c);
2812           if (c >= '(' && c <= '/')
2813             {
2814               /* Designation sequence for a charset of dimension 1.  */
2815               ONE_MORE_BYTE (c1);
2816               if (c1 < ' ' || c1 >= 0x80
2817                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2818                 /* Invalid designation sequence.  Just ignore.  */
2819                 break;
2820             }
2821           else if (c == '$')
2822             {
2823               /* Designation sequence for a charset of dimension 2.  */
2824               ONE_MORE_BYTE (c);
2825               if (c >= '@' && c <= 'B')
2826                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2827                 id = iso_charset_table[1][0][c];
2828               else if (c >= '(' && c <= '/')
2829                 {
2830                   ONE_MORE_BYTE (c1);
2831                   if (c1 < ' ' || c1 >= 0x80
2832                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2833                     /* Invalid designation sequence.  Just ignore.  */
2834                     break;
2835                 }
2836               else
2837                 /* Invalid designation sequence.  Just ignore it.  */
2838                 break;
2839             }
2840           else if (c == 'N' || c == 'O')
2841             {
2842               /* ESC <Fe> for SS2 or SS3.  */
2843               single_shifting = 1;
2844               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2845               break;
2846             }
2847           else if (c == '1')
2848             {
2849               /* End of composition.  */
2850               if (composition_count < 0
2851                   || composition_count > MAX_COMPOSITION_COMPONENTS)
2852                 /* Invalid */
2853                 break;
2854               composition_count = -1;
2855               found |= CATEGORY_MASK_ISO;
2856             }
2857           else if (c >= '0' && c <= '4')
2858             {
2859               /* ESC <Fp> for start/end composition.  */
2860               composition_count = 0;
2861               break;
2862             }
2863           else
2864             {
2865               /* Invalid escape sequence.  Just ignore it.  */
2866               break;
2867             }
2868
2869           /* We found a valid designation sequence for CHARSET.  */
2870           rejected |= CATEGORY_MASK_ISO_8BIT;
2871           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2872                               id))
2873             found |= CATEGORY_MASK_ISO_7;
2874           else
2875             rejected |= CATEGORY_MASK_ISO_7;
2876           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2877                               id))
2878             found |= CATEGORY_MASK_ISO_7_TIGHT;
2879           else
2880             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2881           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2882                               id))
2883             found |= CATEGORY_MASK_ISO_7_ELSE;
2884           else
2885             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2886           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2887                               id))
2888             found |= CATEGORY_MASK_ISO_8_ELSE;
2889           else
2890             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2891           break;
2892
2893         case ISO_CODE_SO:
2894         case ISO_CODE_SI:
2895           /* Locking shift out/in.  */
2896           if (inhibit_iso_escape_detection)
2897             break;
2898           single_shifting = 0;
2899           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2900           break;
2901
2902         case ISO_CODE_CSI:
2903           /* Control sequence introducer.  */
2904           single_shifting = 0;
2905           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2906           found |= CATEGORY_MASK_ISO_8_ELSE;
2907           goto check_extra_latin;
2908
2909         case ISO_CODE_SS2:
2910         case ISO_CODE_SS3:
2911           /* Single shift.   */
2912           if (inhibit_iso_escape_detection)
2913             break;
2914           single_shifting = 0;
2915           rejected |= CATEGORY_MASK_ISO_7BIT;
2916           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2917               & CODING_ISO_FLAG_SINGLE_SHIFT)
2918             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2919           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2920               & CODING_ISO_FLAG_SINGLE_SHIFT)
2921             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2922           if (single_shifting)
2923             break;
2924           goto check_extra_latin;
2925
2926         default:
2927           if (c < 0)
2928             continue;
2929           if (c < 0x80)
2930             {
2931               if (composition_count >= 0)
2932                 composition_count++;
2933               single_shifting = 0;
2934               break;
2935             }
2936           if (c >= 0xA0)
2937             {
2938               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2939               found |= CATEGORY_MASK_ISO_8_1;
2940               /* Check the length of succeeding codes of the range
2941                  0xA0..0FF.  If the byte length is even, we include
2942                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2943                  only when we are not single shifting.  */
2944               if (! single_shifting
2945                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2946                 {
2947                   int i = 1;
2948                   while (src < src_end)
2949                     {
2950                       ONE_MORE_BYTE (c);
2951                       if (c < 0xA0)
2952                         break;
2953                       i++;
2954                     }
2955
2956                   if (i & 1 && src < src_end)
2957                     {
2958                       rejected |= CATEGORY_MASK_ISO_8_2;
2959                       if (composition_count >= 0)
2960                         composition_count += i;
2961                     }
2962                   else
2963                     {
2964                       found |= CATEGORY_MASK_ISO_8_2;
2965                       if (composition_count >= 0)
2966                         composition_count += i / 2;
2967                     }
2968                 }
2969               break;
2970             }
2971         check_extra_latin:
2972           single_shifting = 0;
2973           if (! VECTORP (Vlatin_extra_code_table)
2974               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2975             {
2976               rejected = CATEGORY_MASK_ISO;
2977               break;
2978             }
2979           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2980               & CODING_ISO_FLAG_LATIN_EXTRA)
2981             found |= CATEGORY_MASK_ISO_8_1;
2982           else
2983             rejected |= CATEGORY_MASK_ISO_8_1;
2984           rejected |= CATEGORY_MASK_ISO_8_2;
2985         }
2986     }
2987   detect_info->rejected |= CATEGORY_MASK_ISO;
2988   return 0;
2989
2990  no_more_source:
2991   detect_info->rejected |= rejected;
2992   detect_info->found |= (found & ~rejected);
2993   return 1;
2994 }
2995
2996
2997 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
2998    escape sequence should be kept.  */
2999 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3000   do {                                                                  \
3001     int id, prev;                                                       \
3002                                                                         \
3003     if (final < '0' || final >= 128                                     \
3004         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3005         || !SAFE_CHARSET_P (coding, id))                                \
3006       {                                                                 \
3007         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3008         chars_96 = -1;                                                  \
3009         break;                                                          \
3010       }                                                                 \
3011     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3012     if (id == charset_jisx0201_roman)                                   \
3013       {                                                                 \
3014         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3015           id = charset_ascii;                                           \
3016       }                                                                 \
3017     else if (id == charset_jisx0208_1978)                               \
3018       {                                                                 \
3019         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3020           id = charset_jisx0208;                                        \
3021       }                                                                 \
3022     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3023     /* If there was an invalid designation to REG previously, and this  \
3024        designation is ASCII to REG, we should keep this designation     \
3025        sequence.  */                                                    \
3026     if (prev == -2 && id == charset_ascii)                              \
3027       chars_96 = -1;                                                    \
3028   } while (0)
3029
3030
3031 #define MAYBE_FINISH_COMPOSITION()                              \
3032   do {                                                          \
3033     int i;                                                      \
3034     if (composition_state == COMPOSING_NO)                      \
3035       break;                                                    \
3036     /* It is assured that we have enough room for producing     \
3037        characters stored in the table `components'.  */         \
3038     if (charbuf + component_idx > charbuf_end)                  \
3039       goto no_more_source;                                      \
3040     composition_state = COMPOSING_NO;                           \
3041     if (method == COMPOSITION_RELATIVE                          \
3042         || method == COMPOSITION_WITH_ALTCHARS)                 \
3043       {                                                         \
3044         for (i = 0; i < component_idx; i++)                     \
3045           *charbuf++ = components[i];                           \
3046         char_offset += component_idx;                           \
3047       }                                                         \
3048     else                                                        \
3049       {                                                         \
3050         for (i = 0; i < component_idx; i += 2)                  \
3051           *charbuf++ = components[i];                           \
3052         char_offset += (component_idx / 2) + 1;                 \
3053       }                                                         \
3054   } while (0)
3055
3056
3057 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3058    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3059    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3060    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3061    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3062   */
3063
3064 #define DECODE_COMPOSITION_START(c1)                                    \
3065   do {                                                                  \
3066     if (c1 == '0'                                                       \
3067         && composition_state == COMPOSING_COMPONENT_RULE)               \
3068       {                                                                 \
3069         component_len = component_idx;                                  \
3070         composition_state = COMPOSING_CHAR;                             \
3071       }                                                                 \
3072     else                                                                \
3073       {                                                                 \
3074         const unsigned char *p;                                         \
3075                                                                         \
3076         MAYBE_FINISH_COMPOSITION ();                                    \
3077         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
3078           goto no_more_source;                                          \
3079         for (p = src; p < src_end - 1; p++)                             \
3080           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
3081             break;                                                      \
3082         if (p == src_end - 1)                                           \
3083           {                                                             \
3084             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
3085               goto invalid_code;                                        \
3086             /* The current composition doesn't end in the current       \
3087                source.  */                                              \
3088             record_conversion_result                                    \
3089               (coding, CODING_RESULT_INSUFFICIENT_SRC);                 \
3090             goto no_more_source;                                        \
3091           }                                                             \
3092                                                                         \
3093         /* This is surely the start of a composition.  */               \
3094         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
3095                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
3096                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
3097                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
3098         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
3099                              : COMPOSING_COMPONENT_CHAR);               \
3100         component_idx = component_len = 0;                              \
3101       }                                                                 \
3102   } while (0)
3103
3104
3105 /* Handle compositoin end sequence ESC 1.  */
3106
3107 #define DECODE_COMPOSITION_END()                                        \
3108   do {                                                                  \
3109     int nchars = (component_len > 0 ? component_idx - component_len     \
3110                   : method == COMPOSITION_RELATIVE ? component_idx      \
3111                   : (component_idx + 1) / 2);                           \
3112     int i;                                                              \
3113     int *saved_charbuf = charbuf;                                       \
3114                                                                         \
3115     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
3116     if (method != COMPOSITION_RELATIVE)                                 \
3117       {                                                                 \
3118         if (component_len == 0)                                         \
3119           for (i = 0; i < component_idx; i++)                           \
3120             *charbuf++ = components[i];                                 \
3121         else                                                            \
3122           for (i = 0; i < component_len; i++)                           \
3123             *charbuf++ = components[i];                                 \
3124         *saved_charbuf = saved_charbuf - charbuf;                       \
3125       }                                                                 \
3126     if (method == COMPOSITION_WITH_RULE)                                \
3127       for (i = 0; i < component_idx; i += 2, char_offset++)             \
3128         *charbuf++ = components[i];                                     \
3129     else                                                                \
3130       for (i = component_len; i < component_idx; i++, char_offset++)    \
3131         *charbuf++ = components[i];                                     \
3132     coding->annotated = 1;                                              \
3133     composition_state = COMPOSING_NO;                                   \
3134   } while (0)
3135
3136
3137 /* Decode a composition rule from the byte C1 (and maybe one more byte
3138    from SRC) and store one encoded composition rule in
3139    coding->cmp_data.  */
3140
3141 #define DECODE_COMPOSITION_RULE(c1)                                     \
3142   do {                                                                  \
3143     (c1) -= 32;                                                         \
3144     if (c1 < 81)                /* old format (before ver.21) */        \
3145       {                                                                 \
3146         int gref = (c1) / 9;                                            \
3147         int nref = (c1) % 9;                                            \
3148         if (gref == 4) gref = 10;                                       \
3149         if (nref == 4) nref = 10;                                       \
3150         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
3151       }                                                                 \
3152     else if (c1 < 93)           /* new format (after ver.21) */         \
3153       {                                                                 \
3154         ONE_MORE_BYTE (c2);                                             \
3155         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
3156       }                                                                 \
3157     else                                                                \
3158       c1 = 0;                                                           \
3159   } while (0)
3160
3161
3162 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3163
3164 static void
3165 decode_coding_iso_2022 (coding)
3166      struct coding_system *coding;
3167 {
3168   const unsigned char *src = coding->source + coding->consumed;
3169   const unsigned char *src_end = coding->source + coding->src_bytes;
3170   const unsigned char *src_base;
3171   int *charbuf = coding->charbuf + coding->charbuf_used;
3172   int *charbuf_end
3173     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
3174   int consumed_chars = 0, consumed_chars_base;
3175   int multibytep = coding->src_multibyte;
3176   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3177   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3178   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3179   int charset_id_2, charset_id_3;
3180   struct charset *charset;
3181   int c;
3182   /* For handling composition sequence.  */
3183 #define COMPOSING_NO                    0
3184 #define COMPOSING_CHAR                  1
3185 #define COMPOSING_RULE                  2
3186 #define COMPOSING_COMPONENT_CHAR        3
3187 #define COMPOSING_COMPONENT_RULE        4
3188
3189   int composition_state = COMPOSING_NO;
3190   enum composition_method method;
3191   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3192   int component_idx;
3193   int component_len;
3194   Lisp_Object attrs, charset_list;
3195   int char_offset = coding->produced_char;
3196   int last_offset = char_offset;
3197   int last_id = charset_ascii;
3198   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3199   int byte_after_cr = -1;
3200
3201   CODING_GET_INFO (coding, attrs, charset_list);
3202   setup_iso_safe_charsets (attrs);
3203   /* Charset list may have been changed.  */
3204   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3205   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3206
3207   while (1)
3208     {
3209       int c1, c2;
3210
3211       src_base = src;
3212       consumed_chars_base = consumed_chars;
3213
3214       if (charbuf >= charbuf_end)
3215         {
3216           if (byte_after_cr >= 0)
3217             src_base--;
3218           break;
3219         }
3220
3221       if (byte_after_cr >= 0)
3222         c1 = byte_after_cr, byte_after_cr = -1;
3223       else
3224         ONE_MORE_BYTE (c1);
3225       if (c1 < 0)
3226         goto invalid_code;
3227
3228       /* We produce at most one character.  */
3229       switch (iso_code_class [c1])
3230         {
3231         case ISO_0x20_or_0x7F:
3232           if (composition_state != COMPOSING_NO)
3233             {
3234               if (composition_state == COMPOSING_RULE
3235                   || composition_state == COMPOSING_COMPONENT_RULE)
3236                 {
3237                   if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3238                     {
3239                       DECODE_COMPOSITION_RULE (c1);
3240                       components[component_idx++] = c1;
3241                       composition_state--;
3242                       continue;
3243                     }
3244                   /* Too long composition.  */
3245                   MAYBE_FINISH_COMPOSITION ();
3246                 }
3247             }
3248           if (charset_id_0 < 0
3249               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3250             /* This is SPACE or DEL.  */
3251             charset = CHARSET_FROM_ID (charset_ascii);
3252           else
3253             charset = CHARSET_FROM_ID (charset_id_0);
3254           break;
3255
3256         case ISO_graphic_plane_0:
3257           if (composition_state != COMPOSING_NO)
3258             {
3259               if (composition_state == COMPOSING_RULE
3260                   || composition_state == COMPOSING_COMPONENT_RULE)
3261                 {
3262                   if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3263                     {
3264                       DECODE_COMPOSITION_RULE (c1);
3265                       components[component_idx++] = c1;
3266                       composition_state--;
3267                       continue;
3268                     }
3269                   MAYBE_FINISH_COMPOSITION ();
3270                 }
3271             }
3272           if (charset_id_0 < 0)
3273             charset = CHARSET_FROM_ID (charset_ascii);
3274           else
3275             charset = CHARSET_FROM_ID (charset_id_0);
3276           break;
3277
3278         case ISO_0xA0_or_0xFF:
3279           if (charset_id_1 < 0
3280               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3281               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3282             goto invalid_code;
3283           /* This is a graphic character, we fall down ... */
3284
3285         case ISO_graphic_plane_1:
3286           if (charset_id_1 < 0)
3287             goto invalid_code;
3288           charset = CHARSET_FROM_ID (charset_id_1);
3289           break;
3290
3291         case ISO_control_0:
3292           if (eol_crlf && c1 == '\r')
3293             ONE_MORE_BYTE (byte_after_cr);
3294           MAYBE_FINISH_COMPOSITION ();
3295           charset = CHARSET_FROM_ID (charset_ascii);
3296           break;
3297
3298         case ISO_control_1:
3299           MAYBE_FINISH_COMPOSITION ();
3300           goto invalid_code;
3301
3302         case ISO_shift_out:
3303           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3304               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3305             goto invalid_code;
3306           CODING_ISO_INVOCATION (coding, 0) = 1;
3307           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3308           continue;
3309
3310         case ISO_shift_in:
3311           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3312             goto invalid_code;
3313           CODING_ISO_INVOCATION (coding, 0) = 0;
3314           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3315           continue;
3316
3317         case ISO_single_shift_2_7:
3318         case ISO_single_shift_2:
3319           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3320             goto invalid_code;
3321           /* SS2 is handled as an escape sequence of ESC 'N' */
3322           c1 = 'N';
3323           goto label_escape_sequence;
3324
3325         case ISO_single_shift_3:
3326           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3327             goto invalid_code;
3328           /* SS2 is handled as an escape sequence of ESC 'O' */
3329           c1 = 'O';
3330           goto label_escape_sequence;
3331
3332         case ISO_control_sequence_introducer:
3333           /* CSI is handled as an escape sequence of ESC '[' ...  */
3334           c1 = '[';
3335           goto label_escape_sequence;
3336
3337         case ISO_escape:
3338           ONE_MORE_BYTE (c1);
3339         label_escape_sequence:
3340           /* Escape sequences handled here are invocation,
3341              designation, direction specification, and character
3342              composition specification.  */
3343           switch (c1)
3344             {
3345             case '&':           /* revision of following character set */
3346               ONE_MORE_BYTE (c1);
3347               if (!(c1 >= '@' && c1 <= '~'))
3348                 goto invalid_code;
3349               ONE_MORE_BYTE (c1);
3350               if (c1 != ISO_CODE_ESC)
3351                 goto invalid_code;
3352               ONE_MORE_BYTE (c1);
3353               goto label_escape_sequence;
3354
3355             case '$':           /* designation of 2-byte character set */
3356               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3357                 goto invalid_code;
3358               {
3359                 int reg, chars96;
3360
3361                 ONE_MORE_BYTE (c1);
3362                 if (c1 >= '@' && c1 <= 'B')
3363                   {     /* designation of JISX0208.1978, GB2312.1980,
3364                            or JISX0208.1980 */
3365                     reg = 0, chars96 = 0;
3366                   }
3367                 else if (c1 >= 0x28 && c1 <= 0x2B)
3368                   { /* designation of DIMENSION2_CHARS94 character set */
3369                     reg = c1 - 0x28, chars96 = 0;
3370                     ONE_MORE_BYTE (c1);
3371                   }
3372                 else if (c1 >= 0x2C && c1 <= 0x2F)
3373                   { /* designation of DIMENSION2_CHARS96 character set */
3374                     reg = c1 - 0x2C, chars96 = 1;
3375                     ONE_MORE_BYTE (c1);
3376                   }
3377                 else
3378                   goto invalid_code;
3379                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3380                 /* We must update these variables now.  */
3381                 if (reg == 0)
3382                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3383                 else if (reg == 1)
3384                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3385                 if (chars96 < 0)
3386                   goto invalid_code;
3387               }
3388               continue;
3389
3390             case 'n':           /* invocation of locking-shift-2 */
3391               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3392                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3393                 goto invalid_code;
3394               CODING_ISO_INVOCATION (coding, 0) = 2;
3395               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3396               continue;
3397
3398             case 'o':           /* invocation of locking-shift-3 */
3399               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3400                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3401                 goto invalid_code;
3402               CODING_ISO_INVOCATION (coding, 0) = 3;
3403               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3404               continue;
3405
3406             case 'N':           /* invocation of single-shift-2 */
3407               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3408                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3409                 goto invalid_code;
3410               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3411               if (charset_id_2 < 0)
3412                 charset = CHARSET_FROM_ID (charset_ascii);
3413               else
3414                 charset = CHARSET_FROM_ID (charset_id_2);
3415               ONE_MORE_BYTE (c1);
3416               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3417                 goto invalid_code;
3418               break;
3419
3420             case 'O':           /* invocation of single-shift-3 */
3421               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3422                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3423                 goto invalid_code;
3424               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3425               if (charset_id_3 < 0)
3426                 charset = CHARSET_FROM_ID (charset_ascii);
3427               else
3428                 charset = CHARSET_FROM_ID (charset_id_3);
3429               ONE_MORE_BYTE (c1);
3430               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3431                 goto invalid_code;
3432               break;
3433
3434             case '0': case '2': case '3': case '4': /* start composition */
3435               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3436                 goto invalid_code;
3437               DECODE_COMPOSITION_START (c1);
3438               continue;
3439
3440             case '1':           /* end composition */
3441               if (composition_state == COMPOSING_NO)
3442                 goto invalid_code;
3443               DECODE_COMPOSITION_END ();
3444               continue;
3445
3446             case '[':           /* specification of direction */
3447               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3448                 goto invalid_code;
3449               /* For the moment, nested direction is not supported.
3450                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3451                  left-to-right, and nozero means right-to-left.  */
3452               ONE_MORE_BYTE (c1);
3453               switch (c1)
3454                 {
3455                 case ']':       /* end of the current direction */
3456                   coding->mode &= ~CODING_MODE_DIRECTION;
3457
3458                 case '0':       /* end of the current direction */
3459                 case '1':       /* start of left-to-right direction */
3460                   ONE_MORE_BYTE (c1);
3461                   if (c1 == ']')
3462                     coding->mode &= ~CODING_MODE_DIRECTION;
3463                   else
3464                     goto invalid_code;
3465                   break;
3466
3467                 case '2':       /* start of right-to-left direction */
3468                   ONE_MORE_BYTE (c1);
3469                   if (c1 == ']')
3470                     coding->mode |= CODING_MODE_DIRECTION;
3471                   else
3472                     goto invalid_code;
3473                   break;
3474
3475                 default:
3476                   goto invalid_code;
3477                 }
3478               continue;
3479
3480             case '%':
3481               ONE_MORE_BYTE (c1);
3482               if (c1 == '/')
3483                 {
3484                   /* CTEXT extended segment:
3485                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3486                      We keep these bytes as is for the moment.
3487                      They may be decoded by post-read-conversion.  */
3488                   int dim, M, L;
3489                   int size;
3490
3491                   ONE_MORE_BYTE (dim);
3492                   ONE_MORE_BYTE (M);
3493                   ONE_MORE_BYTE (L);
3494                   size = ((M - 128) * 128) + (L - 128);
3495                   if (charbuf + 8 + size > charbuf_end)
3496                     goto break_loop;
3497                   *charbuf++ = ISO_CODE_ESC;
3498                   *charbuf++ = '%';
3499                   *charbuf++ = '/';
3500                   *charbuf++ = dim;
3501                   *charbuf++ = BYTE8_TO_CHAR (M);
3502                   *charbuf++ = BYTE8_TO_CHAR (L);
3503                   while (size-- > 0)
3504                     {
3505                       ONE_MORE_BYTE (c1);
3506                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3507                     }
3508                 }
3509               else if (c1 == 'G')
3510                 {
3511                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3512                      ESC % G --UTF-8-BYTES-- ESC % @
3513                      We keep these bytes as is for the moment.
3514                      They may be decoded by post-read-conversion.  */
3515                   int *p = charbuf;
3516
3517                   if (p + 6 > charbuf_end)
3518                     goto break_loop;
3519                   *p++ = ISO_CODE_ESC;
3520                   *p++ = '%';
3521                   *p++ = 'G';
3522                   while (p < charbuf_end)
3523                     {
3524                       ONE_MORE_BYTE (c1);
3525                       if (c1 == ISO_CODE_ESC
3526                           && src + 1 < src_end
3527                           && src[0] == '%'
3528                           && src[1] == '@')
3529                         {
3530                           src += 2;
3531                           break;
3532                         }
3533                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3534                     }
3535                   if (p + 3 > charbuf_end)
3536                     goto break_loop;
3537                   *p++ = ISO_CODE_ESC;
3538                   *p++ = '%';
3539                   *p++ = '@';
3540                   charbuf = p;
3541                 }
3542               else
3543                 goto invalid_code;
3544               continue;
3545               break;
3546
3547             default:
3548               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3549                 goto invalid_code;
3550               {
3551                 int reg, chars96;
3552
3553                 if (c1 >= 0x28 && c1 <= 0x2B)
3554                   { /* designation of DIMENSION1_CHARS94 character set */
3555                     reg = c1 - 0x28, chars96 = 0;
3556                     ONE_MORE_BYTE (c1);
3557                   }
3558                 else if (c1 >= 0x2C && c1 <= 0x2F)
3559                   { /* designation of DIMENSION1_CHARS96 character set */
3560                     reg = c1 - 0x2C, chars96 = 1;
3561                     ONE_MORE_BYTE (c1);
3562                   }
3563                 else
3564                   goto invalid_code;
3565                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3566                 /* We must update these variables now.  */
3567                 if (reg == 0)
3568                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3569                 else if (reg == 1)
3570                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3571                 if (chars96 < 0)
3572                   goto invalid_code;
3573               }
3574               continue;
3575             }
3576         }
3577
3578       if (charset->id != charset_ascii
3579           && last_id != charset->id)
3580         {
3581           if (last_id != charset_ascii)
3582             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3583           last_id = charset->id;
3584           last_offset = char_offset;
3585         }
3586
3587       /* Now we know CHARSET and 1st position code C1 of a character.
3588          Produce a decoded character while getting 2nd position code
3589          C2 if necessary.  */
3590       c1 &= 0x7F;
3591       if (CHARSET_DIMENSION (charset) > 1)
3592         {
3593           ONE_MORE_BYTE (c2);
3594           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3595             /* C2 is not in a valid range.  */
3596             goto invalid_code;
3597           c1 = (c1 << 8) | (c2 & 0x7F);
3598           if (CHARSET_DIMENSION (charset) > 2)
3599             {
3600               ONE_MORE_BYTE (c2);
3601               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3602                 /* C2 is not in a valid range.  */
3603                 goto invalid_code;
3604               c1 = (c1 << 8) | (c2 & 0x7F);
3605             }
3606         }
3607
3608       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3609       if (c < 0)
3610         {
3611           MAYBE_FINISH_COMPOSITION ();
3612           for (; src_base < src; src_base++, char_offset++)
3613             {
3614               if (ASCII_BYTE_P (*src_base))
3615                 *charbuf++ = *src_base;
3616               else
3617                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3618             }
3619         }
3620       else if (composition_state == COMPOSING_NO)
3621         {
3622           *charbuf++ = c;
3623           char_offset++;
3624         }
3625       else
3626         {
3627           if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3628             {
3629               components[component_idx++] = c;
3630               if (method == COMPOSITION_WITH_RULE
3631                   || (method == COMPOSITION_WITH_RULE_ALTCHARS
3632                       && composition_state == COMPOSING_COMPONENT_CHAR))
3633                 composition_state++;
3634             }
3635           else
3636             {
3637               MAYBE_FINISH_COMPOSITION ();
3638               *charbuf++ = c;
3639               char_offset++;
3640             }
3641         }
3642       continue;
3643
3644     invalid_code:
3645       MAYBE_FINISH_COMPOSITION ();
3646       src = src_base;
3647       consumed_chars = consumed_chars_base;
3648       ONE_MORE_BYTE (c);
3649       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3650       char_offset++;
3651       coding->errors++;
3652       continue;
3653
3654     break_loop:
3655       break;
3656     }
3657
3658  no_more_source:
3659   if (last_id != charset_ascii)
3660     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3661   coding->consumed_char += consumed_chars_base;
3662   coding->consumed = src_base - coding->source;
3663   coding->charbuf_used = charbuf - coding->charbuf;
3664 }
3665
3666
3667 /* ISO2022 encoding stuff.  */
3668
3669 /*
3670    It is not enough to say just "ISO2022" on encoding, we have to
3671    specify more details.  In Emacs, each coding system of ISO2022
3672    variant has the following specifications:
3673         1. Initial designation to G0 thru G3.
3674         2. Allows short-form designation?
3675         3. ASCII should be designated to G0 before control characters?
3676         4. ASCII should be designated to G0 at end of line?
3677         5. 7-bit environment or 8-bit environment?
3678         6. Use locking-shift?
3679         7. Use Single-shift?
3680    And the following two are only for Japanese:
3681         8. Use ASCII in place of JIS0201-1976-Roman?
3682         9. Use JISX0208-1983 in place of JISX0208-1978?
3683    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3684    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3685    details.
3686 */
3687
3688 /* Produce codes (escape sequence) for designating CHARSET to graphic
3689    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3690    '@', 'A', or 'B' and the coding system CODING allows, produce
3691    designation sequence of short-form.  */
3692
3693 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3694   do {                                                                  \
3695     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3696     char *intermediate_char_94 = "()*+";                                \
3697     char *intermediate_char_96 = ",-./";                                \
3698     int revision = -1;                                                  \
3699     int c;                                                              \
3700                                                                         \
3701     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3702       revision = CHARSET_ISO_REVISION (charset);                        \
3703                                                                         \
3704     if (revision >= 0)                                                  \
3705       {                                                                 \
3706         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3707         EMIT_ONE_BYTE ('@' + revision);                                 \
3708       }                                                                 \
3709     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3710     if (CHARSET_DIMENSION (charset) == 1)                               \
3711       {                                                                 \
3712         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3713           c = intermediate_char_94[reg];                                \
3714         else                                                            \
3715           c = intermediate_char_96[reg];                                \
3716         EMIT_ONE_ASCII_BYTE (c);                                        \
3717       }                                                                 \
3718     else                                                                \
3719       {                                                                 \
3720         EMIT_ONE_ASCII_BYTE ('$');                                      \
3721         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3722           {                                                             \
3723             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3724                 || reg != 0                                             \
3725                 || final_char < '@' || final_char > 'B')                \
3726               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3727           }                                                             \
3728         else                                                            \
3729           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3730       }                                                                 \
3731     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3732                                                                         \
3733     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3734   } while (0)
3735
3736
3737 /* The following two macros produce codes (control character or escape
3738    sequence) for ISO2022 single-shift functions (single-shift-2 and
3739    single-shift-3).  */
3740
3741 #define ENCODE_SINGLE_SHIFT_2                                           \
3742   do {                                                                  \
3743     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3744       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3745     else                                                                \
3746       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3747     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3748   } while (0)
3749
3750
3751 #define ENCODE_SINGLE_SHIFT_3                                           \
3752   do {                                                                  \
3753     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3754       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3755     else                                                                \
3756       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3757     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3758   } while (0)
3759
3760
3761 /* The following four macros produce codes (control character or
3762    escape sequence) for ISO2022 locking-shift functions (shift-in,
3763    shift-out, locking-shift-2, and locking-shift-3).  */
3764
3765 #define ENCODE_SHIFT_IN                                 \
3766   do {                                                  \
3767     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3768     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3769   } while (0)
3770
3771
3772 #define ENCODE_SHIFT_OUT                                \
3773   do {                                                  \
3774     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3775     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3776   } while (0)
3777
3778
3779 #define ENCODE_LOCKING_SHIFT_2                          \
3780   do {                                                  \
3781     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3782     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3783   } while (0)
3784
3785
3786 #define ENCODE_LOCKING_SHIFT_3                          \
3787   do {                                                  \
3788     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3789     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3790   } while (0)
3791
3792
3793 /* Produce codes for a DIMENSION1 character whose character set is
3794    CHARSET and whose position-code is C1.  Designation and invocation
3795    sequences are also produced in advance if necessary.  */
3796
3797 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3798   do {                                                                  \
3799     int id = CHARSET_ID (charset);                                      \
3800                                                                         \
3801     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3802         && id == charset_ascii)                                         \
3803       {                                                                 \
3804         id = charset_jisx0201_roman;                                    \
3805         charset = CHARSET_FROM_ID (id);                                 \
3806       }                                                                 \
3807                                                                         \
3808     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3809       {                                                                 \
3810         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3811           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3812         else                                                            \
3813           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3814         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3815         break;                                                          \
3816       }                                                                 \
3817     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3818       {                                                                 \
3819         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3820         break;                                                          \
3821       }                                                                 \
3822     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3823       {                                                                 \
3824         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3825         break;                                                          \
3826       }                                                                 \
3827     else                                                                \
3828       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3829          must invoke it, or, at first, designate it to some graphic     \
3830          register.  Then repeat the loop to actually produce the        \
3831          character.  */                                                 \
3832       dst = encode_invocation_designation (charset, coding, dst,        \
3833                                            &produced_chars);            \
3834   } while (1)
3835
3836
3837 /* Produce codes for a DIMENSION2 character whose character set is
3838    CHARSET and whose position-codes are C1 and C2.  Designation and
3839    invocation codes are also produced in advance if necessary.  */
3840
3841 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3842   do {                                                                  \
3843     int id = CHARSET_ID (charset);                                      \
3844                                                                         \
3845     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3846         && id == charset_jisx0208)                                      \
3847       {                                                                 \
3848         id = charset_jisx0208_1978;                                     \
3849         charset = CHARSET_FROM_ID (id);                                 \
3850       }                                                                 \
3851                                                                         \
3852     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3853       {                                                                 \
3854         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3855           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3856         else                                                            \
3857           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3858         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3859         break;                                                          \
3860       }                                                                 \
3861     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3862       {                                                                 \
3863         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3864         break;                                                          \
3865       }                                                                 \
3866     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3867       {                                                                 \
3868         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3869         break;                                                          \
3870       }                                                                 \
3871     else                                                                \
3872       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3873          must invoke it, or, at first, designate it to some graphic     \
3874          register.  Then repeat the loop to actually produce the        \
3875          character.  */                                                 \
3876       dst = encode_invocation_designation (charset, coding, dst,        \
3877                                            &produced_chars);            \
3878   } while (1)
3879
3880
3881 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3882   do {                                                                     \
3883     int code = ENCODE_CHAR ((charset),(c));                                \
3884                                                                            \
3885     if (CHARSET_DIMENSION (charset) == 1)                                  \
3886       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3887     else                                                                   \
3888       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3889   } while (0)
3890
3891
3892 /* Produce designation and invocation codes at a place pointed by DST
3893    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3894    Return new DST.  */
3895
3896 unsigned char *
3897 encode_invocation_designation (charset, coding, dst, p_nchars)
3898      struct charset *charset;
3899      struct coding_system *coding;
3900      unsigned char *dst;
3901      int *p_nchars;
3902 {
3903   int multibytep = coding->dst_multibyte;
3904   int produced_chars = *p_nchars;
3905   int reg;                      /* graphic register number */
3906   int id = CHARSET_ID (charset);
3907
3908   /* At first, check designations.  */
3909   for (reg = 0; reg < 4; reg++)
3910     if (id == CODING_ISO_DESIGNATION (coding, reg))
3911       break;
3912
3913   if (reg >= 4)
3914     {
3915       /* CHARSET is not yet designated to any graphic registers.  */
3916       /* At first check the requested designation.  */
3917       reg = CODING_ISO_REQUEST (coding, id);
3918       if (reg < 0)
3919         /* Since CHARSET requests no special designation, designate it
3920            to graphic register 0.  */
3921         reg = 0;
3922
3923       ENCODE_DESIGNATION (charset, reg, coding);
3924     }
3925
3926   if (CODING_ISO_INVOCATION (coding, 0) != reg
3927       && CODING_ISO_INVOCATION (coding, 1) != reg)
3928     {
3929       /* Since the graphic register REG is not invoked to any graphic
3930          planes, invoke it to graphic plane 0.  */
3931       switch (reg)
3932         {
3933         case 0:                 /* graphic register 0 */
3934           ENCODE_SHIFT_IN;
3935           break;
3936
3937         case 1:                 /* graphic register 1 */
3938           ENCODE_SHIFT_OUT;
3939           break;
3940
3941         case 2:                 /* graphic register 2 */
3942           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3943             ENCODE_SINGLE_SHIFT_2;
3944           else
3945             ENCODE_LOCKING_SHIFT_2;
3946           break;
3947
3948         case 3:                 /* graphic register 3 */
3949           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3950             ENCODE_SINGLE_SHIFT_3;
3951           else
3952             ENCODE_LOCKING_SHIFT_3;
3953           break;
3954         }
3955     }
3956
3957   *p_nchars = produced_chars;
3958   return dst;
3959 }
3960
3961 /* The following three macros produce codes for indicating direction
3962    of text.  */
3963 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3964   do {                                                                  \
3965     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3966       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3967     else                                                                \
3968       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3969   } while (0)
3970
3971
3972 #define ENCODE_DIRECTION_R2L()                  \
3973   do {                                          \
3974     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3975     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3976   } while (0)
3977
3978
3979 #define ENCODE_DIRECTION_L2R()                  \
3980   do {                                          \
3981     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3982     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3983   } while (0)
3984
3985
3986 /* Produce codes for designation and invocation to reset the graphic
3987    planes and registers to initial state.  */
3988 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3989   do {                                                                  \
3990     int reg;                                                            \
3991     struct charset *charset;                                            \
3992                                                                         \
3993     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3994       ENCODE_SHIFT_IN;                                                  \
3995     for (reg = 0; reg < 4; reg++)                                       \
3996       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3997           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3998               != CODING_ISO_INITIAL (coding, reg)))                     \
3999         {                                                               \
4000           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4001           ENCODE_DESIGNATION (charset, reg, coding);                    \
4002         }                                                               \
4003   } while (0)
4004
4005
4006 /* Produce designation sequences of charsets in the line started from
4007    SRC to a place pointed by DST, and return updated DST.
4008
4009    If the current block ends before any end-of-line, we may fail to
4010    find all the necessary designations.  */
4011
4012 static unsigned char *
4013 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
4014      struct coding_system *coding;
4015      int *charbuf, *charbuf_end;
4016      unsigned char *dst;
4017 {
4018   struct charset *charset;
4019   /* Table of charsets to be designated to each graphic register.  */
4020   int r[4];
4021   int c, found = 0, reg;
4022   int produced_chars = 0;
4023   int multibytep = coding->dst_multibyte;
4024   Lisp_Object attrs;
4025   Lisp_Object charset_list;
4026
4027   attrs = CODING_ID_ATTRS (coding->id);
4028   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4029   if (EQ (charset_list, Qiso_2022))
4030     charset_list = Viso_2022_charset_list;
4031
4032   for (reg = 0; reg < 4; reg++)
4033     r[reg] = -1;
4034
4035   while (found < 4)
4036     {
4037       int id;
4038
4039       c = *charbuf++;
4040       if (c == '\n')
4041         break;
4042       charset = char_charset (c, charset_list, NULL);
4043       id = CHARSET_ID (charset);
4044       reg = CODING_ISO_REQUEST (coding, id);
4045       if (reg >= 0 && r[reg] < 0)
4046         {
4047           found++;
4048           r[reg] = id;
4049         }
4050     }
4051
4052   if (found)
4053     {
4054       for (reg = 0; reg < 4; reg++)
4055         if (r[reg] >= 0
4056             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4057           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4058     }
4059
4060   return dst;
4061 }
4062
4063 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4064
4065 static int
4066 encode_coding_iso_2022 (coding)
4067      struct coding_system *coding;
4068 {
4069   int multibytep = coding->dst_multibyte;
4070   int *charbuf = coding->charbuf;
4071   int *charbuf_end = charbuf + coding->charbuf_used;
4072   unsigned char *dst = coding->destination + coding->produced;
4073   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4074   int safe_room = 16;
4075   int bol_designation
4076     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4077        && CODING_ISO_BOL (coding));
4078   int produced_chars = 0;
4079   Lisp_Object attrs, eol_type, charset_list;
4080   int ascii_compatible;
4081   int c;
4082   int preferred_charset_id = -1;
4083
4084   CODING_GET_INFO (coding, attrs, charset_list);
4085   eol_type = CODING_ID_EOL_TYPE (coding->id);
4086   if (VECTORP (eol_type))
4087     eol_type = Qunix;
4088
4089   setup_iso_safe_charsets (attrs);
4090   /* Charset list may have been changed.  */
4091   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4092   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
4093
4094   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4095
4096   while (charbuf < charbuf_end)
4097     {
4098       ASSURE_DESTINATION (safe_room);
4099
4100       if (bol_designation)
4101         {
4102           unsigned char *dst_prev = dst;
4103
4104           /* We have to produce designation sequences if any now.  */
4105           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4106           bol_designation = 0;
4107           /* We are sure that designation sequences are all ASCII bytes.  */
4108           produced_chars += dst - dst_prev;
4109         }
4110
4111       c = *charbuf++;
4112
4113       if (c < 0)
4114         {
4115           /* Handle an annotation.  */
4116           switch (*charbuf)
4117             {
4118             case CODING_ANNOTATE_COMPOSITION_MASK:
4119               /* Not yet implemented.  */
4120               break;
4121             case CODING_ANNOTATE_CHARSET_MASK:
4122               preferred_charset_id = charbuf[2];
4123               if (preferred_charset_id >= 0
4124                   && NILP (Fmemq (make_number (preferred_charset_id),
4125                                   charset_list)))
4126                 preferred_charset_id = -1;
4127               break;
4128             default:
4129               abort ();
4130             }
4131           charbuf += -c - 1;
4132           continue;
4133         }
4134
4135       /* Now encode the character C.  */
4136       if (c < 0x20 || c == 0x7F)
4137         {
4138           if (c == '\n'
4139               || (c == '\r' && EQ (eol_type, Qmac)))
4140             {
4141               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4142                 ENCODE_RESET_PLANE_AND_REGISTER ();
4143               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4144                 {
4145                   int i;
4146
4147                   for (i = 0; i < 4; i++)
4148                     CODING_ISO_DESIGNATION (coding, i)
4149                       = CODING_ISO_INITIAL (coding, i);
4150                 }
4151               bol_designation
4152                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4153             }
4154           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4155             ENCODE_RESET_PLANE_AND_REGISTER ();
4156           EMIT_ONE_ASCII_BYTE (c);
4157         }
4158       else if (ASCII_CHAR_P (c))
4159         {
4160           if (ascii_compatible)
4161             EMIT_ONE_ASCII_BYTE (c);
4162           else
4163             {
4164               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4165               ENCODE_ISO_CHARACTER (charset, c);
4166             }
4167         }
4168       else if (CHAR_BYTE8_P (c))
4169         {
4170           c = CHAR_TO_BYTE8 (c);
4171           EMIT_ONE_BYTE (c);
4172         }
4173       else
4174         {
4175           struct charset *charset;
4176
4177           if (preferred_charset_id >= 0)
4178             {
4179               charset = CHARSET_FROM_ID (preferred_charset_id);
4180               if (! CHAR_CHARSET_P (c, charset))
4181                 charset = char_charset (c, charset_list, NULL);
4182             }
4183           else
4184             charset = char_charset (c, charset_list, NULL);
4185           if (!charset)
4186             {
4187               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4188                 {
4189                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4190                   charset = CHARSET_FROM_ID (charset_ascii);
4191                 }
4192               else
4193                 {
4194                   c = coding->default_char;
4195                   charset = char_charset (c, charset_list, NULL);
4196                 }
4197             }
4198           ENCODE_ISO_CHARACTER (charset, c);
4199         }
4200     }
4201
4202   if (coding->mode & CODING_MODE_LAST_BLOCK
4203       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4204     {
4205       ASSURE_DESTINATION (safe_room);
4206       ENCODE_RESET_PLANE_AND_REGISTER ();
4207     }
4208   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4209   CODING_ISO_BOL (coding) = bol_designation;
4210   coding->produced_char += produced_chars;
4211   coding->produced = dst - coding->destination;
4212   return 0;
4213 }
4214
4215 \f
4216 /*** 8,9. SJIS and BIG5 handlers ***/
4217
4218 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4219    quite widely.  So, for the moment, Emacs supports them in the bare
4220    C code.  But, in the future, they may be supported only by CCL.  */
4221
4222 /* SJIS is a coding system encoding three character sets: ASCII, right
4223    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4224    as is.  A character of charset katakana-jisx0201 is encoded by
4225    "position-code + 0x80".  A character of charset japanese-jisx0208
4226    is encoded in 2-byte but two position-codes are divided and shifted
4227    so that it fit in the range below.
4228
4229    --- CODE RANGE of SJIS ---
4230    (character set)      (range)
4231    ASCII                0x00 .. 0x7F
4232    KATAKANA-JISX0201    0xA0 .. 0xDF
4233    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4234             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4235    -------------------------------
4236
4237 */
4238
4239 /* BIG5 is a coding system encoding two character sets: ASCII and
4240    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4241    character set and is encoded in two-byte.
4242
4243    --- CODE RANGE of BIG5 ---
4244    (character set)      (range)
4245    ASCII                0x00 .. 0x7F
4246    Big5 (1st byte)      0xA1 .. 0xFE
4247         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4248    --------------------------
4249
4250   */
4251
4252 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4253    Check if a text is encoded in SJIS.  If it is, return
4254    CATEGORY_MASK_SJIS, else return 0.  */
4255
4256 static int
4257 detect_coding_sjis (coding, detect_info)
4258      struct coding_system *coding;
4259      struct coding_detection_info *detect_info;
4260 {
4261   const unsigned char *src = coding->source, *src_base;
4262   const unsigned char *src_end = coding->source + coding->src_bytes;
4263   int multibytep = coding->src_multibyte;
4264   int consumed_chars = 0;
4265   int found = 0;
4266   int c;
4267
4268   detect_info->checked |= CATEGORY_MASK_SJIS;
4269   /* A coding system of this category is always ASCII compatible.  */
4270   src += coding->head_ascii;
4271
4272   while (1)
4273     {
4274       src_base = src;
4275       ONE_MORE_BYTE (c);
4276       if (c < 0x80)
4277         continue;
4278       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4279         {
4280           ONE_MORE_BYTE (c);
4281           if (c < 0x40 || c == 0x7F || c > 0xFC)
4282             break;
4283           found = CATEGORY_MASK_SJIS;
4284         }
4285       else if (c >= 0xA0 && c < 0xE0)
4286         found = CATEGORY_MASK_SJIS;
4287       else
4288         break;
4289     }
4290   detect_info->rejected |= CATEGORY_MASK_SJIS;
4291   return 0;
4292
4293  no_more_source:
4294   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4295     {
4296       detect_info->rejected |= CATEGORY_MASK_SJIS;
4297       return 0;
4298     }
4299   detect_info->found |= found;
4300   return 1;
4301 }
4302
4303 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4304    Check if a text is encoded in BIG5.  If it is, return
4305    CATEGORY_MASK_BIG5, else return 0.  */
4306
4307 static int
4308 detect_coding_big5 (coding, detect_info)
4309      struct coding_system *coding;
4310      struct coding_detection_info *detect_info;
4311 {
4312   const unsigned char *src = coding->source, *src_base;
4313   const unsigned char *src_end = coding->source + coding->src_bytes;
4314   int multibytep = coding->src_multibyte;
4315   int consumed_chars = 0;
4316   int found = 0;
4317   int c;
4318
4319   detect_info->checked |= CATEGORY_MASK_BIG5;
4320   /* A coding system of this category is always ASCII compatible.  */
4321   src += coding->head_ascii;
4322
4323   while (1)
4324     {
4325       src_base = src;
4326       ONE_MORE_BYTE (c);
4327       if (c < 0x80)
4328         continue;
4329       if (c >= 0xA1)
4330         {
4331           ONE_MORE_BYTE (c);
4332           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4333             return 0;
4334           found = CATEGORY_MASK_BIG5;
4335         }
4336       else
4337         break;
4338     }
4339   detect_info->rejected |= CATEGORY_MASK_BIG5;
4340   return 0;
4341
4342  no_more_source:
4343   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4344     {
4345       detect_info->rejected |= CATEGORY_MASK_BIG5;
4346       return 0;
4347     }
4348   detect_info->found |= found;
4349   return 1;
4350 }
4351
4352 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4353    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4354
4355 static void
4356 decode_coding_sjis (coding)
4357      struct coding_system *coding;
4358 {
4359   const unsigned char *src = coding->source + coding->consumed;
4360   const unsigned char *src_end = coding->source + coding->src_bytes;
4361   const unsigned char *src_base;
4362   int *charbuf = coding->charbuf + coding->charbuf_used;
4363   int *charbuf_end
4364     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4365   int consumed_chars = 0, consumed_chars_base;
4366   int multibytep = coding->src_multibyte;
4367   struct charset *charset_roman, *charset_kanji, *charset_kana;
4368   struct charset *charset_kanji2;
4369   Lisp_Object attrs, charset_list, val;
4370   int char_offset = coding->produced_char;
4371   int last_offset = char_offset;
4372   int last_id = charset_ascii;
4373   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4374   int byte_after_cr = -1;
4375
4376   CODING_GET_INFO (coding, attrs, charset_list);
4377
4378   val = charset_list;
4379   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4380   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4381   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4382   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4383
4384   while (1)
4385     {
4386       int c, c1;
4387       struct charset *charset;
4388
4389       src_base = src;
4390       consumed_chars_base = consumed_chars;
4391
4392       if (charbuf >= charbuf_end)
4393         {
4394           if (byte_after_cr >= 0)
4395             src_base--;
4396           break;
4397         }
4398
4399       if (byte_after_cr >= 0)
4400         c = byte_after_cr, byte_after_cr = -1;
4401       else
4402         ONE_MORE_BYTE (c);
4403       if (c < 0)
4404         goto invalid_code;
4405       if (c < 0x80)
4406         {
4407           if (eol_crlf && c == '\r')
4408             ONE_MORE_BYTE (byte_after_cr);
4409           charset = charset_roman;
4410         }
4411       else if (c == 0x80 || c == 0xA0)
4412         goto invalid_code;
4413       else if (c >= 0xA1 && c <= 0xDF)
4414         {
4415           /* SJIS -> JISX0201-Kana */
4416           c &= 0x7F;
4417           charset = charset_kana;
4418         }
4419       else if (c <= 0xEF)
4420         {
4421           /* SJIS -> JISX0208 */
4422           ONE_MORE_BYTE (c1);
4423           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4424             goto invalid_code;
4425           c = (c << 8) | c1;
4426           SJIS_TO_JIS (c);
4427           charset = charset_kanji;
4428         }
4429       else if (c <= 0xFC && charset_kanji2)
4430         {
4431           /* SJIS -> JISX0213-2 */
4432           ONE_MORE_BYTE (c1);
4433           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4434             goto invalid_code;
4435           c = (c << 8) | c1;
4436           SJIS_TO_JIS2 (c);
4437           charset = charset_kanji2;
4438         }
4439       else
4440         goto invalid_code;
4441       if (charset->id != charset_ascii
4442           && last_id != charset->id)
4443         {
4444           if (last_id != charset_ascii)
4445             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4446           last_id = charset->id;
4447           last_offset = char_offset;
4448         }
4449       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4450       *charbuf++ = c;
4451       char_offset++;
4452       continue;
4453
4454     invalid_code:
4455       src = src_base;
4456       consumed_chars = consumed_chars_base;
4457       ONE_MORE_BYTE (c);
4458       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4459       char_offset++;
4460       coding->errors++;
4461     }
4462
4463  no_more_source:
4464   if (last_id != charset_ascii)
4465     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4466   coding->consumed_char += consumed_chars_base;
4467   coding->consumed = src_base - coding->source;
4468   coding->charbuf_used = charbuf - coding->charbuf;
4469 }
4470
4471 static void
4472 decode_coding_big5 (coding)
4473      struct coding_system *coding;
4474 {
4475   const unsigned char *src = coding->source + coding->consumed;
4476   const unsigned char *src_end = coding->source + coding->src_bytes;
4477   const unsigned char *src_base;
4478   int *charbuf = coding->charbuf + coding->charbuf_used;
4479   int *charbuf_end
4480     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4481   int consumed_chars = 0, consumed_chars_base;
4482   int multibytep = coding->src_multibyte;
4483   struct charset *charset_roman, *charset_big5;
4484   Lisp_Object attrs, charset_list, val;
4485   int char_offset = coding->produced_char;
4486   int last_offset = char_offset;
4487   int last_id = charset_ascii;
4488   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4489   int byte_after_cr = -1;
4490
4491   CODING_GET_INFO (coding, attrs, charset_list);
4492   val = charset_list;
4493   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4494   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4495
4496   while (1)
4497     {
4498       int c, c1;
4499       struct charset *charset;
4500
4501       src_base = src;
4502       consumed_chars_base = consumed_chars;
4503
4504       if (charbuf >= charbuf_end)
4505         {
4506           if (byte_after_cr >= 0)
4507             src_base--;
4508           break;
4509         }
4510
4511       if (byte_after_cr >= 0)
4512         c = byte_after_cr, byte_after_cr = -1;
4513       else
4514         ONE_MORE_BYTE (c);
4515
4516       if (c < 0)
4517         goto invalid_code;
4518       if (c < 0x80)
4519         {
4520           if (eol_crlf && c == '\r')
4521             ONE_MORE_BYTE (byte_after_cr);
4522           charset = charset_roman;
4523         }
4524       else
4525         {
4526           /* BIG5 -> Big5 */
4527           if (c < 0xA1 || c > 0xFE)
4528             goto invalid_code;
4529           ONE_MORE_BYTE (c1);
4530           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4531             goto invalid_code;
4532           c = c << 8 | c1;
4533           charset = charset_big5;
4534         }
4535       if (charset->id != charset_ascii
4536           && last_id != charset->id)
4537         {
4538           if (last_id != charset_ascii)
4539             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4540           last_id = charset->id;
4541           last_offset = char_offset;
4542         }
4543       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4544       *charbuf++ = c;
4545       char_offset++;
4546       continue;
4547
4548     invalid_code:
4549       src = src_base;
4550       consumed_chars = consumed_chars_base;
4551       ONE_MORE_BYTE (c);
4552       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4553       char_offset++;
4554       coding->errors++;
4555     }
4556
4557  no_more_source:
4558   if (last_id != charset_ascii)
4559     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4560   coding->consumed_char += consumed_chars_base;
4561   coding->consumed = src_base - coding->source;
4562   coding->charbuf_used = charbuf - coding->charbuf;
4563 }
4564
4565 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4566    This function can encode charsets `ascii', `katakana-jisx0201',
4567    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4568    are sure that all these charsets are registered as official charset
4569    (i.e. do not have extended leading-codes).  Characters of other
4570    charsets are produced without any encoding.  If SJIS_P is 1, encode
4571    SJIS text, else encode BIG5 text.  */
4572
4573 static int
4574 encode_coding_sjis (coding)
4575      struct coding_system *coding;
4576 {
4577   int multibytep = coding->dst_multibyte;
4578   int *charbuf = coding->charbuf;
4579   int *charbuf_end = charbuf + coding->charbuf_used;
4580   unsigned char *dst = coding->destination + coding->produced;
4581   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4582   int safe_room = 4;
4583   int produced_chars = 0;
4584   Lisp_Object attrs, charset_list, val;
4585   int ascii_compatible;
4586   struct charset *charset_roman, *charset_kanji, *charset_kana;
4587   struct charset *charset_kanji2;
4588   int c;
4589
4590   CODING_GET_INFO (coding, attrs, charset_list);
4591   val = charset_list;
4592   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4593   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4594   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4595   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4596
4597   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4598
4599   while (charbuf < charbuf_end)
4600     {
4601       ASSURE_DESTINATION (safe_room);
4602       c = *charbuf++;
4603       /* Now encode the character C.  */
4604       if (ASCII_CHAR_P (c) && ascii_compatible)
4605         EMIT_ONE_ASCII_BYTE (c);
4606       else if (CHAR_BYTE8_P (c))
4607         {
4608           c = CHAR_TO_BYTE8 (c);
4609           EMIT_ONE_BYTE (c);
4610         }
4611       else
4612         {
4613           unsigned code;
4614           struct charset *charset = char_charset (c, charset_list, &code);
4615
4616           if (!charset)
4617             {
4618               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4619                 {
4620                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4621                   charset = CHARSET_FROM_ID (charset_ascii);
4622                 }
4623               else
4624                 {
4625                   c = coding->default_char;
4626                   charset = char_charset (c, charset_list, &code);
4627                 }
4628             }
4629           if (code == CHARSET_INVALID_CODE (charset))
4630             abort ();
4631           if (charset == charset_kanji)
4632             {
4633               int c1, c2;
4634               JIS_TO_SJIS (code);
4635               c1 = code >> 8, c2 = code & 0xFF;
4636               EMIT_TWO_BYTES (c1, c2);
4637             }
4638           else if (charset == charset_kana)
4639             EMIT_ONE_BYTE (code | 0x80);
4640           else if (charset_kanji2 && charset == charset_kanji2)
4641             {
4642               int c1, c2;
4643
4644               c1 = code >> 8;
4645               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4646                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4647                 {
4648                   JIS_TO_SJIS2 (code);
4649                   c1 = code >> 8, c2 = code & 0xFF;
4650                   EMIT_TWO_BYTES (c1, c2);
4651                 }
4652               else
4653                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4654             }
4655           else
4656             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4657         }
4658     }
4659   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4660   coding->produced_char += produced_chars;
4661   coding->produced = dst - coding->destination;
4662   return 0;
4663 }
4664
4665 static int
4666 encode_coding_big5 (coding)
4667      struct coding_system *coding;
4668 {
4669   int multibytep = coding->dst_multibyte;
4670   int *charbuf = coding->charbuf;
4671   int *charbuf_end = charbuf + coding->charbuf_used;
4672   unsigned char *dst = coding->destination + coding->produced;
4673   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4674   int safe_room = 4;
4675   int produced_chars = 0;
4676   Lisp_Object attrs, charset_list, val;
4677   int ascii_compatible;
4678   struct charset *charset_roman, *charset_big5;
4679   int c;
4680
4681   CODING_GET_INFO (coding, attrs, charset_list);
4682   val = charset_list;
4683   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4684   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4685   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4686
4687   while (charbuf < charbuf_end)
4688     {
4689       ASSURE_DESTINATION (safe_room);
4690       c = *charbuf++;
4691       /* Now encode the character C.  */
4692       if (ASCII_CHAR_P (c) && ascii_compatible)
4693         EMIT_ONE_ASCII_BYTE (c);
4694       else if (CHAR_BYTE8_P (c))
4695         {
4696           c = CHAR_TO_BYTE8 (c);
4697           EMIT_ONE_BYTE (c);
4698         }
4699       else
4700         {
4701           unsigned code;
4702           struct charset *charset = char_charset (c, charset_list, &code);
4703
4704           if (! charset)
4705             {
4706               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4707                 {
4708                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4709                   charset = CHARSET_FROM_ID (charset_ascii);
4710                 }
4711               else
4712                 {
4713                   c = coding->default_char;
4714                   charset = char_charset (c, charset_list, &code);
4715                 }
4716             }
4717           if (code == CHARSET_INVALID_CODE (charset))
4718             abort ();
4719           if (charset == charset_big5)
4720             {
4721               int c1, c2;
4722
4723               c1 = code >> 8, c2 = code & 0xFF;
4724               EMIT_TWO_BYTES (c1, c2);
4725             }
4726           else
4727             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4728         }
4729     }
4730   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4731   coding->produced_char += produced_chars;
4732   coding->produced = dst - coding->destination;
4733   return 0;
4734 }
4735
4736 \f
4737 /*** 10. CCL handlers ***/
4738
4739 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4740    Check if a text is encoded in a coding system of which
4741    encoder/decoder are written in CCL program.  If it is, return
4742    CATEGORY_MASK_CCL, else return 0.  */
4743
4744 static int
4745 detect_coding_ccl (coding, detect_info)
4746      struct coding_system *coding;
4747      struct coding_detection_info *detect_info;
4748 {
4749   const unsigned char *src = coding->source, *src_base;
4750   const unsigned char *src_end = coding->source + coding->src_bytes;
4751   int multibytep = coding->src_multibyte;
4752   int consumed_chars = 0;
4753   int found = 0;
4754   unsigned char *valids;
4755   int head_ascii = coding->head_ascii;
4756   Lisp_Object attrs;
4757
4758   detect_info->checked |= CATEGORY_MASK_CCL;
4759
4760   coding = &coding_categories[coding_category_ccl];
4761   valids = CODING_CCL_VALIDS (coding);
4762   attrs = CODING_ID_ATTRS (coding->id);
4763   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4764     src += head_ascii;
4765
4766   while (1)
4767     {
4768       int c;
4769
4770       src_base = src;
4771       ONE_MORE_BYTE (c);
4772       if (c < 0 || ! valids[c])
4773         break;
4774       if ((valids[c] > 1))
4775         found = CATEGORY_MASK_CCL;
4776     }
4777   detect_info->rejected |= CATEGORY_MASK_CCL;
4778   return 0;
4779
4780  no_more_source:
4781   detect_info->found |= found;
4782   return 1;
4783 }
4784
4785 static void
4786 decode_coding_ccl (coding)
4787      struct coding_system *coding;
4788 {
4789   const unsigned char *src = coding->source + coding->consumed;
4790   const unsigned char *src_end = coding->source + coding->src_bytes;
4791   int *charbuf = coding->charbuf + coding->charbuf_used;
4792   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4793   int consumed_chars = 0;
4794   int multibytep = coding->src_multibyte;
4795   struct ccl_program ccl;
4796   int source_charbuf[1024];
4797   int source_byteidx[1024];
4798   Lisp_Object attrs, charset_list;
4799
4800   CODING_GET_INFO (coding, attrs, charset_list);
4801   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4802
4803   while (src < src_end)
4804     {
4805       const unsigned char *p = src;
4806       int *source, *source_end;
4807       int i = 0;
4808
4809       if (multibytep)
4810         while (i < 1024 && p < src_end)
4811           {
4812             source_byteidx[i] = p - src;
4813             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4814           }
4815       else
4816         while (i < 1024 && p < src_end)
4817           source_charbuf[i++] = *p++;
4818
4819       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4820         ccl.last_block = 1;
4821
4822       source = source_charbuf;
4823       source_end = source + i;
4824       while (source < source_end)
4825         {
4826           ccl_driver (&ccl, source, charbuf,
4827                       source_end - source, charbuf_end - charbuf,
4828                       charset_list);
4829           source += ccl.consumed;
4830           charbuf += ccl.produced;
4831           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4832             break;
4833         }
4834       if (source < source_end)
4835         src += source_byteidx[source - source_charbuf];
4836       else
4837         src = p;
4838       consumed_chars += source - source_charbuf;
4839
4840       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4841           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4842         break;
4843     }
4844
4845   switch (ccl.status)
4846     {
4847     case CCL_STAT_SUSPEND_BY_SRC:
4848       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4849       break;
4850     case CCL_STAT_SUSPEND_BY_DST:
4851       break;
4852     case CCL_STAT_QUIT:
4853     case CCL_STAT_INVALID_CMD:
4854       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4855       break;
4856     default:
4857       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4858       break;
4859     }
4860   coding->consumed_char += consumed_chars;
4861   coding->consumed = src - coding->source;
4862   coding->charbuf_used = charbuf - coding->charbuf;
4863 }
4864
4865 static int
4866 encode_coding_ccl (coding)
4867      struct coding_system *coding;
4868 {
4869   struct ccl_program ccl;
4870   int multibytep = coding->dst_multibyte;
4871   int *charbuf = coding->charbuf;
4872   int *charbuf_end = charbuf + coding->charbuf_used;
4873   unsigned char *dst = coding->destination + coding->produced;
4874   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4875   int destination_charbuf[1024];
4876   int i, produced_chars = 0;
4877   Lisp_Object attrs, charset_list;
4878
4879   CODING_GET_INFO (coding, attrs, charset_list);
4880   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4881
4882   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4883   ccl.dst_multibyte = coding->dst_multibyte;
4884
4885   while (charbuf < charbuf_end)
4886     {
4887       ccl_driver (&ccl, charbuf, destination_charbuf,
4888                   charbuf_end - charbuf, 1024, charset_list);
4889       if (multibytep)
4890         {
4891           ASSURE_DESTINATION (ccl.produced * 2);
4892           for (i = 0; i < ccl.produced; i++)
4893             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4894         }
4895       else
4896         {
4897           ASSURE_DESTINATION (ccl.produced);
4898           for (i = 0; i < ccl.produced; i++)
4899             *dst++ = destination_charbuf[i] & 0xFF;
4900           produced_chars += ccl.produced;
4901         }
4902       charbuf += ccl.consumed;
4903       if (ccl.status == CCL_STAT_QUIT
4904           || ccl.status == CCL_STAT_INVALID_CMD)
4905         break;
4906     }
4907
4908   switch (ccl.status)
4909     {
4910     case CCL_STAT_SUSPEND_BY_SRC:
4911       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4912       break;
4913     case CCL_STAT_SUSPEND_BY_DST:
4914       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4915       break;
4916     case CCL_STAT_QUIT:
4917     case CCL_STAT_INVALID_CMD:
4918       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4919       break;
4920     default:
4921       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4922       break;
4923     }
4924
4925   coding->produced_char += produced_chars;
4926   coding->produced = dst - coding->destination;
4927   return 0;
4928 }
4929
4930
4931 \f
4932 /*** 10, 11. no-conversion handlers ***/
4933
4934 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4935
4936 static void
4937 decode_coding_raw_text (coding)
4938      struct coding_system *coding;
4939 {
4940   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4941
4942   coding->chars_at_source = 1;
4943   coding->consumed_char = coding->src_chars;
4944   coding->consumed = coding->src_bytes;
4945   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4946     {
4947       coding->consumed_char--;
4948       coding->consumed--;
4949       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4950     }
4951   else
4952     record_conversion_result (coding, CODING_RESULT_SUCCESS);
4953 }
4954
4955 static int
4956 encode_coding_raw_text (coding)
4957      struct coding_system *coding;
4958 {
4959   int multibytep = coding->dst_multibyte;
4960   int *charbuf = coding->charbuf;
4961   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4962   unsigned char *dst = coding->destination + coding->produced;
4963   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4964   int produced_chars = 0;
4965   int c;
4966
4967   if (multibytep)
4968     {
4969       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4970
4971       if (coding->src_multibyte)
4972         while (charbuf < charbuf_end)
4973           {
4974             ASSURE_DESTINATION (safe_room);
4975             c = *charbuf++;
4976             if (ASCII_CHAR_P (c))
4977               EMIT_ONE_ASCII_BYTE (c);
4978             else if (CHAR_BYTE8_P (c))
4979               {
4980                 c = CHAR_TO_BYTE8 (c);
4981                 EMIT_ONE_BYTE (c);
4982               }
4983             else
4984               {
4985                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4986
4987                 CHAR_STRING_ADVANCE (c, p1);
4988                 while (p0 < p1)
4989                   {
4990                     EMIT_ONE_BYTE (*p0);
4991                     p0++;
4992                   }
4993               }
4994           }
4995       else
4996         while (charbuf < charbuf_end)
4997           {
4998             ASSURE_DESTINATION (safe_room);
4999             c = *charbuf++;
5000             EMIT_ONE_BYTE (c);
5001           }
5002     }
5003   else
5004     {
5005       if (coding->src_multibyte)
5006         {
5007           int safe_room = MAX_MULTIBYTE_LENGTH;
5008
5009           while (charbuf < charbuf_end)
5010             {
5011               ASSURE_DESTINATION (safe_room);
5012               c = *charbuf++;
5013               if (ASCII_CHAR_P (c))
5014                 *dst++ = c;
5015               else if (CHAR_BYTE8_P (c))
5016                 *dst++ = CHAR_TO_BYTE8 (c);
5017               else
5018                 CHAR_STRING_ADVANCE (c, dst);
5019             }
5020         }
5021       else
5022         {
5023           ASSURE_DESTINATION (charbuf_end - charbuf);
5024           while (charbuf < charbuf_end && dst < dst_end)
5025             *dst++ = *charbuf++;
5026         }
5027       produced_chars = dst - (coding->destination + coding->produced);
5028     }
5029   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5030   coding->produced_char += produced_chars;
5031   coding->produced = dst - coding->destination;
5032   return 0;
5033 }
5034
5035 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5036    Check if a text is encoded in a charset-based coding system.  If it
5037    is, return 1, else return 0.  */
5038
5039 static int
5040 detect_coding_charset (coding, detect_info)
5041      struct coding_system *coding;
5042      struct coding_detection_info *detect_info;
5043 {
5044   const unsigned char *src = coding->source, *src_base;
5045   const unsigned char *src_end = coding->source + coding->src_bytes;
5046   int multibytep = coding->src_multibyte;
5047   int consumed_chars = 0;
5048   Lisp_Object attrs, valids, name;
5049   int found = 0;
5050   int head_ascii = coding->head_ascii;
5051   int check_latin_extra = 0;
5052
5053   detect_info->checked |= CATEGORY_MASK_CHARSET;
5054
5055   coding = &coding_categories[coding_category_charset];
5056   attrs = CODING_ID_ATTRS (coding->id);
5057   valids = AREF (attrs, coding_attr_charset_valids);
5058   name = CODING_ID_NAME (coding->id);
5059   if (VECTORP (Vlatin_extra_code_table)
5060       && strcmp ((char *) SDATA (SYMBOL_NAME (name)), "iso-8859-"))
5061     check_latin_extra = 1;
5062   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5063     src += head_ascii;
5064
5065   while (1)
5066     {
5067       int c;
5068       Lisp_Object val;
5069       struct charset *charset;
5070       int dim, idx;
5071
5072       src_base = src;
5073       ONE_MORE_BYTE (c);
5074       if (c < 0)
5075         continue;
5076       val = AREF (valids, c);
5077       if (NILP (val))
5078         break;
5079       if (c >= 0x80)
5080         {
5081           if (c < 0xA0
5082               && check_latin_extra
5083               && NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
5084             break;
5085           found = CATEGORY_MASK_CHARSET;
5086         }
5087       if (INTEGERP (val))
5088         {
5089           charset = CHARSET_FROM_ID (XFASTINT (val));
5090           dim = CHARSET_DIMENSION (charset);
5091           for (idx = 1; idx < dim; idx++)
5092             {
5093               if (src == src_end)
5094                 goto too_short;
5095               ONE_MORE_BYTE (c);
5096               if (c < charset->code_space[(dim - 1 - idx) * 2]
5097                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5098                 break;
5099             }
5100           if (idx < dim)
5101             break;
5102         }
5103       else
5104         {
5105           idx = 1;
5106           for (; CONSP (val); val = XCDR (val))
5107             {
5108               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5109               dim = CHARSET_DIMENSION (charset);
5110               while (idx < dim)
5111                 {
5112                   if (src == src_end)
5113                     goto too_short;
5114                   ONE_MORE_BYTE (c);
5115                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5116                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5117                     break;
5118                   idx++;
5119                 }
5120               if (idx == dim)
5121                 {
5122                   val = Qnil;
5123                   break;
5124                 }
5125             }
5126           if (CONSP (val))
5127             break;
5128         }
5129     }
5130  too_short:
5131   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5132   return 0;
5133
5134  no_more_source:
5135   detect_info->found |= found;
5136   return 1;
5137 }
5138
5139 static void
5140 decode_coding_charset (coding)
5141      struct coding_system *coding;
5142 {
5143   const unsigned char *src = coding->source + coding->consumed;
5144   const unsigned char *src_end = coding->source + coding->src_bytes;
5145   const unsigned char *src_base;
5146   int *charbuf = coding->charbuf + coding->charbuf_used;
5147   int *charbuf_end
5148     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
5149   int consumed_chars = 0, consumed_chars_base;
5150   int multibytep = coding->src_multibyte;
5151   Lisp_Object attrs, charset_list, valids;
5152   int char_offset = coding->produced_char;
5153   int last_offset = char_offset;
5154   int last_id = charset_ascii;
5155   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5156   int byte_after_cr = -1;
5157
5158   CODING_GET_INFO (coding, attrs, charset_list);
5159   valids = AREF (attrs, coding_attr_charset_valids);
5160
5161   while (1)
5162     {
5163       int c;
5164       Lisp_Object val;
5165       struct charset *charset;
5166       int dim;
5167       int len = 1;
5168       unsigned code;
5169
5170       src_base = src;
5171       consumed_chars_base = consumed_chars;
5172
5173       if (charbuf >= charbuf_end)
5174         {
5175           if (byte_after_cr >= 0)
5176             src_base--;
5177           break;
5178         }
5179
5180       if (byte_after_cr >= 0)
5181         {
5182           c = byte_after_cr;
5183           byte_after_cr = -1;
5184         }
5185       else
5186         {
5187           ONE_MORE_BYTE (c);
5188           if (eol_crlf && c == '\r')
5189             ONE_MORE_BYTE (byte_after_cr);
5190         }
5191       if (c < 0)
5192         goto invalid_code;
5193       code = c;
5194
5195       val = AREF (valids, c);
5196       if (! INTEGERP (val) && ! CONSP (val))
5197         goto invalid_code;
5198       if (INTEGERP (val))
5199         {
5200           charset = CHARSET_FROM_ID (XFASTINT (val));
5201           dim = CHARSET_DIMENSION (charset);
5202           while (len < dim)
5203             {
5204               ONE_MORE_BYTE (c);
5205               code = (code << 8) | c;
5206               len++;
5207             }
5208           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5209                               charset, code, c);
5210         }
5211       else
5212         {
5213           /* VAL is a list of charset IDs.  It is assured that the
5214              list is sorted by charset dimensions (smaller one
5215              comes first).  */
5216           while (CONSP (val))
5217             {
5218               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5219               dim = CHARSET_DIMENSION (charset);
5220               while (len < dim)
5221                 {
5222                   ONE_MORE_BYTE (c);
5223                   code = (code << 8) | c;
5224                   len++;
5225                 }
5226               CODING_DECODE_CHAR (coding, src, src_base,
5227                                   src_end, charset, code, c);
5228               if (c >= 0)
5229                 break;
5230               val = XCDR (val);
5231             }
5232         }
5233       if (c < 0)
5234         goto invalid_code;
5235       if (charset->id != charset_ascii
5236           && last_id != charset->id)
5237         {
5238           if (last_id != charset_ascii)
5239             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5240           last_id = charset->id;
5241           last_offset = char_offset;
5242         }
5243
5244       *charbuf++ = c;
5245       char_offset++;
5246       continue;
5247
5248     invalid_code:
5249       src = src_base;
5250       consumed_chars = consumed_chars_base;
5251       ONE_MORE_BYTE (c);
5252       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5253       char_offset++;
5254       coding->errors++;
5255     }
5256
5257  no_more_source:
5258   if (last_id != charset_ascii)
5259     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5260   coding->consumed_char += consumed_chars_base;
5261   coding->consumed = src_base - coding->source;
5262   coding->charbuf_used = charbuf - coding->charbuf;
5263 }
5264
5265 static int
5266 encode_coding_charset (coding)
5267      struct coding_system *coding;
5268 {
5269   int multibytep = coding->dst_multibyte;
5270   int *charbuf = coding->charbuf;
5271   int *charbuf_end = charbuf + coding->charbuf_used;
5272   unsigned char *dst = coding->destination + coding->produced;
5273   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5274   int safe_room = MAX_MULTIBYTE_LENGTH;
5275   int produced_chars = 0;
5276   Lisp_Object attrs, charset_list;
5277   int ascii_compatible;
5278   int c;
5279
5280   CODING_GET_INFO (coding, attrs, charset_list);
5281   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5282
5283   while (charbuf < charbuf_end)
5284     {
5285       struct charset *charset;
5286       unsigned code;
5287
5288       ASSURE_DESTINATION (safe_room);
5289       c = *charbuf++;
5290       if (ascii_compatible && ASCII_CHAR_P (c))
5291         EMIT_ONE_ASCII_BYTE (c);
5292       else if (CHAR_BYTE8_P (c))
5293         {
5294           c = CHAR_TO_BYTE8 (c);
5295           EMIT_ONE_BYTE (c);
5296         }
5297       else
5298         {
5299           charset = char_charset (c, charset_list, &code);
5300           if (charset)
5301             {
5302               if (CHARSET_DIMENSION (charset) == 1)
5303                 EMIT_ONE_BYTE (code);
5304               else if (CHARSET_DIMENSION (charset) == 2)
5305                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5306               else if (CHARSET_DIMENSION (charset) == 3)
5307                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5308               else
5309                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5310                                  (code >> 8) & 0xFF, code & 0xFF);
5311             }
5312           else
5313             {
5314               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5315                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5316               else
5317                 c = coding->default_char;
5318               EMIT_ONE_BYTE (c);
5319             }
5320         }
5321     }
5322
5323   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5324   coding->produced_char += produced_chars;
5325   coding->produced = dst - coding->destination;
5326   return 0;
5327 }
5328
5329 \f
5330 /*** 7. C library functions ***/
5331
5332 /* Setup coding context CODING from information about CODING_SYSTEM.
5333    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5334    CODING_SYSTEM is invalid, signal an error.  */
5335
5336 void
5337 setup_coding_system (coding_system, coding)
5338      Lisp_Object coding_system;
5339      struct coding_system *coding;
5340 {
5341   Lisp_Object attrs;
5342   Lisp_Object eol_type;
5343   Lisp_Object coding_type;
5344   Lisp_Object val;
5345
5346   if (NILP (coding_system))
5347     coding_system = Qundecided;
5348
5349   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5350
5351   attrs = CODING_ID_ATTRS (coding->id);
5352   eol_type = CODING_ID_EOL_TYPE (coding->id);
5353
5354   coding->mode = 0;
5355   coding->head_ascii = -1;
5356   if (VECTORP (eol_type))
5357     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5358                             | CODING_REQUIRE_DETECTION_MASK);
5359   else if (! EQ (eol_type, Qunix))
5360     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5361                             | CODING_REQUIRE_ENCODING_MASK);
5362   else
5363     coding->common_flags = 0;
5364   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5365     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5366   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5367     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5368   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5369     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5370
5371   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5372   coding->max_charset_id = SCHARS (val) - 1;
5373   coding->safe_charsets = (char *) SDATA (val);
5374   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5375
5376   coding_type = CODING_ATTR_TYPE (attrs);
5377   if (EQ (coding_type, Qundecided))
5378     {
5379       coding->detector = NULL;
5380       coding->decoder = decode_coding_raw_text;
5381       coding->encoder = encode_coding_raw_text;
5382       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5383     }
5384   else if (EQ (coding_type, Qiso_2022))
5385     {
5386       int i;
5387       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5388
5389       /* Invoke graphic register 0 to plane 0.  */
5390       CODING_ISO_INVOCATION (coding, 0) = 0;
5391       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5392       CODING_ISO_INVOCATION (coding, 1)
5393         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5394       /* Setup the initial status of designation.  */
5395       for (i = 0; i < 4; i++)
5396         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5397       /* Not single shifting initially.  */
5398       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5399       /* Beginning of buffer should also be regarded as bol. */
5400       CODING_ISO_BOL (coding) = 1;
5401       coding->detector = detect_coding_iso_2022;
5402       coding->decoder = decode_coding_iso_2022;
5403       coding->encoder = encode_coding_iso_2022;
5404       if (flags & CODING_ISO_FLAG_SAFE)
5405         coding->mode |= CODING_MODE_SAFE_ENCODING;
5406       coding->common_flags
5407         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5408             | CODING_REQUIRE_FLUSHING_MASK);
5409       if (flags & CODING_ISO_FLAG_COMPOSITION)
5410         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5411       if (flags & CODING_ISO_FLAG_DESIGNATION)
5412         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5413       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5414         {
5415           setup_iso_safe_charsets (attrs);
5416           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5417           coding->max_charset_id = SCHARS (val) - 1;
5418           coding->safe_charsets = (char *) SDATA (val);
5419         }
5420       CODING_ISO_FLAGS (coding) = flags;
5421     }
5422   else if (EQ (coding_type, Qcharset))
5423     {
5424       coding->detector = detect_coding_charset;
5425       coding->decoder = decode_coding_charset;
5426       coding->encoder = encode_coding_charset;
5427       coding->common_flags
5428         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5429     }
5430   else if (EQ (coding_type, Qutf_8))
5431     {
5432       val = AREF (attrs, coding_attr_utf_bom);
5433       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5434                                    : EQ (val, Qt) ? utf_with_bom
5435                                    : utf_without_bom);
5436       coding->detector = detect_coding_utf_8;
5437       coding->decoder = decode_coding_utf_8;
5438       coding->encoder = encode_coding_utf_8;
5439       coding->common_flags
5440         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5441       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5442         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5443     }
5444   else if (EQ (coding_type, Qutf_16))
5445     {
5446       val = AREF (attrs, coding_attr_utf_bom);
5447       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5448                                     : EQ (val, Qt) ? utf_with_bom
5449                                     : utf_without_bom);
5450       val = AREF (attrs, coding_attr_utf_16_endian);
5451       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5452                                        : utf_16_little_endian);
5453       CODING_UTF_16_SURROGATE (coding) = 0;
5454       coding->detector = detect_coding_utf_16;
5455       coding->decoder = decode_coding_utf_16;
5456       coding->encoder = encode_coding_utf_16;
5457       coding->common_flags
5458         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5459       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5460         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5461     }
5462   else if (EQ (coding_type, Qccl))
5463     {
5464       coding->detector = detect_coding_ccl;
5465       coding->decoder = decode_coding_ccl;
5466       coding->encoder = encode_coding_ccl;
5467       coding->common_flags
5468         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5469             | CODING_REQUIRE_FLUSHING_MASK);
5470     }
5471   else if (EQ (coding_type, Qemacs_mule))
5472     {
5473       coding->detector = detect_coding_emacs_mule;
5474       coding->decoder = decode_coding_emacs_mule;
5475       coding->encoder = encode_coding_emacs_mule;
5476       coding->common_flags
5477         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5478       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5479           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5480         {
5481           Lisp_Object tail, safe_charsets;
5482           int max_charset_id = 0;
5483
5484           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5485                tail = XCDR (tail))
5486             if (max_charset_id < XFASTINT (XCAR (tail)))
5487               max_charset_id = XFASTINT (XCAR (tail));
5488           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5489                                         make_number (255));
5490           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5491                tail = XCDR (tail))
5492             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5493           coding->max_charset_id = max_charset_id;
5494           coding->safe_charsets = (char *) SDATA (safe_charsets);
5495         }
5496     }
5497   else if (EQ (coding_type, Qshift_jis))
5498     {
5499       coding->detector = detect_coding_sjis;
5500       coding->decoder = decode_coding_sjis;
5501       coding->encoder = encode_coding_sjis;
5502       coding->common_flags
5503         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5504     }
5505   else if (EQ (coding_type, Qbig5))
5506     {
5507       coding->detector = detect_coding_big5;
5508       coding->decoder = decode_coding_big5;
5509       coding->encoder = encode_coding_big5;
5510       coding->common_flags
5511         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5512     }
5513   else                          /* EQ (coding_type, Qraw_text) */
5514     {
5515       coding->detector = NULL;
5516       coding->decoder = decode_coding_raw_text;
5517       coding->encoder = encode_coding_raw_text;
5518       if (! EQ (eol_type, Qunix))
5519         {
5520           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5521           if (! VECTORP (eol_type))
5522             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5523         }
5524
5525     }
5526
5527   return;
5528 }
5529
5530 /* Return a list of charsets supported by CODING.  */
5531
5532 Lisp_Object
5533 coding_charset_list (coding)
5534      struct coding_system *coding;
5535 {
5536   Lisp_Object attrs, charset_list;
5537
5538   CODING_GET_INFO (coding, attrs, charset_list);
5539   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5540     {
5541       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5542
5543       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5544         charset_list = Viso_2022_charset_list;
5545     }
5546   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5547     {
5548       charset_list = Vemacs_mule_charset_list;
5549     }
5550   return charset_list;
5551 }
5552
5553
5554 /* Return raw-text or one of its subsidiaries that has the same
5555    eol_type as CODING-SYSTEM.  */
5556
5557 Lisp_Object
5558 raw_text_coding_system (coding_system)
5559      Lisp_Object coding_system;
5560 {
5561   Lisp_Object spec, attrs;
5562   Lisp_Object eol_type, raw_text_eol_type;
5563
5564   if (NILP (coding_system))
5565     return Qraw_text;
5566   spec = CODING_SYSTEM_SPEC (coding_system);
5567   attrs = AREF (spec, 0);
5568
5569   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5570     return coding_system;
5571
5572   eol_type = AREF (spec, 2);
5573   if (VECTORP (eol_type))
5574     return Qraw_text;
5575   spec = CODING_SYSTEM_SPEC (Qraw_text);
5576   raw_text_eol_type = AREF (spec, 2);
5577   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5578           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5579           : AREF (raw_text_eol_type, 2));
5580 }
5581
5582
5583 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5584    does, return one of the subsidiary that has the same eol-spec as
5585    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
5586    inherit end-of-line format from the system's setting
5587    (system_eol_type).  */
5588
5589 Lisp_Object
5590 coding_inherit_eol_type (coding_system, parent)
5591      Lisp_Object coding_system, parent;
5592 {
5593   Lisp_Object spec, eol_type;
5594
5595   if (NILP (coding_system))
5596     coding_system = Qraw_text;
5597   spec = CODING_SYSTEM_SPEC (coding_system);
5598   eol_type = AREF (spec, 2);
5599   if (VECTORP (eol_type))
5600     {
5601       Lisp_Object parent_eol_type;
5602
5603       if (! NILP (parent))
5604         {
5605           Lisp_Object parent_spec;
5606
5607           parent_spec = CODING_SYSTEM_SPEC (parent);
5608           parent_eol_type = AREF (parent_spec, 2);
5609         }
5610       else
5611         parent_eol_type = system_eol_type;
5612       if (EQ (parent_eol_type, Qunix))
5613         coding_system = AREF (eol_type, 0);
5614       else if (EQ (parent_eol_type, Qdos))
5615         coding_system = AREF (eol_type, 1);
5616       else if (EQ (parent_eol_type, Qmac))
5617         coding_system = AREF (eol_type, 2);
5618     }
5619   return coding_system;
5620 }
5621
5622 /* Emacs has a mechanism to automatically detect a coding system if it
5623    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5624    it's impossible to distinguish some coding systems accurately
5625    because they use the same range of codes.  So, at first, coding
5626    systems are categorized into 7, those are:
5627
5628    o coding-category-emacs-mule
5629
5630         The category for a coding system which has the same code range
5631         as Emacs' internal format.  Assigned the coding-system (Lisp
5632         symbol) `emacs-mule' by default.
5633
5634    o coding-category-sjis
5635
5636         The category for a coding system which has the same code range
5637         as SJIS.  Assigned the coding-system (Lisp
5638         symbol) `japanese-shift-jis' by default.
5639
5640    o coding-category-iso-7
5641
5642         The category for a coding system which has the same code range
5643         as ISO2022 of 7-bit environment.  This doesn't use any locking
5644         shift and single shift functions.  This can encode/decode all
5645         charsets.  Assigned the coding-system (Lisp symbol)
5646         `iso-2022-7bit' by default.
5647
5648    o coding-category-iso-7-tight
5649
5650         Same as coding-category-iso-7 except that this can
5651         encode/decode only the specified charsets.
5652
5653    o coding-category-iso-8-1
5654
5655         The category for a coding system which has the same code range
5656         as ISO2022 of 8-bit environment and graphic plane 1 used only
5657         for DIMENSION1 charset.  This doesn't use any locking shift
5658         and single shift functions.  Assigned the coding-system (Lisp
5659         symbol) `iso-latin-1' by default.
5660
5661    o coding-category-iso-8-2
5662
5663         The category for a coding system which has the same code range
5664         as ISO2022 of 8-bit environment and graphic plane 1 used only
5665         for DIMENSION2 charset.  This doesn't use any locking shift
5666         and single shift functions.  Assigned the coding-system (Lisp
5667         symbol) `japanese-iso-8bit' by default.
5668
5669    o coding-category-iso-7-else
5670
5671         The category for a coding system which has the same code range
5672         as ISO2022 of 7-bit environemnt but uses locking shift or
5673         single shift functions.  Assigned the coding-system (Lisp
5674         symbol) `iso-2022-7bit-lock' by default.
5675
5676    o coding-category-iso-8-else
5677
5678         The category for a coding system which has the same code range
5679         as ISO2022 of 8-bit environemnt but uses locking shift or
5680         single shift functions.  Assigned the coding-system (Lisp
5681         symbol) `iso-2022-8bit-ss2' by default.
5682
5683    o coding-category-big5
5684
5685         The category for a coding system which has the same code range
5686         as BIG5.  Assigned the coding-system (Lisp symbol)
5687         `cn-big5' by default.
5688
5689    o coding-category-utf-8
5690
5691         The category for a coding system which has the same code range
5692         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5693         symbol) `utf-8' by default.
5694
5695    o coding-category-utf-16-be
5696
5697         The category for a coding system in which a text has an
5698         Unicode signature (cf. Unicode Standard) in the order of BIG
5699         endian at the head.  Assigned the coding-system (Lisp symbol)
5700         `utf-16-be' by default.
5701
5702    o coding-category-utf-16-le
5703
5704         The category for a coding system in which a text has an
5705         Unicode signature (cf. Unicode Standard) in the order of
5706         LITTLE endian at the head.  Assigned the coding-system (Lisp
5707         symbol) `utf-16-le' by default.
5708
5709    o coding-category-ccl
5710
5711         The category for a coding system of which encoder/decoder is
5712         written in CCL programs.  The default value is nil, i.e., no
5713         coding system is assigned.
5714
5715    o coding-category-binary
5716
5717         The category for a coding system not categorized in any of the
5718         above.  Assigned the coding-system (Lisp symbol)
5719         `no-conversion' by default.
5720
5721    Each of them is a Lisp symbol and the value is an actual
5722    `coding-system's (this is also a Lisp symbol) assigned by a user.
5723    What Emacs does actually is to detect a category of coding system.
5724    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5725    decide only one possible category, it selects a category of the
5726    highest priority.  Priorities of categories are also specified by a
5727    user in a Lisp variable `coding-category-list'.
5728
5729 */
5730
5731 #define EOL_SEEN_NONE   0
5732 #define EOL_SEEN_LF     1
5733 #define EOL_SEEN_CR     2
5734 #define EOL_SEEN_CRLF   4
5735
5736 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5737    SOURCE is encoded.  If CATEGORY is one of
5738    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5739    two-byte, else they are encoded by one-byte.
5740
5741    Return one of EOL_SEEN_XXX.  */
5742
5743 #define MAX_EOL_CHECK_COUNT 3
5744
5745 static int
5746 detect_eol (source, src_bytes, category)
5747      const unsigned char *source;
5748      EMACS_INT src_bytes;
5749      enum coding_category category;
5750 {
5751   const unsigned char *src = source, *src_end = src + src_bytes;
5752   unsigned char c;
5753   int total  = 0;
5754   int eol_seen = EOL_SEEN_NONE;
5755
5756   if ((1 << category) & CATEGORY_MASK_UTF_16)
5757     {
5758       int msb, lsb;
5759
5760       msb = category == (coding_category_utf_16_le
5761                          | coding_category_utf_16_le_nosig);
5762       lsb = 1 - msb;
5763
5764       while (src + 1 < src_end)
5765         {
5766           c = src[lsb];
5767           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5768             {
5769               int this_eol;
5770
5771               if (c == '\n')
5772                 this_eol = EOL_SEEN_LF;
5773               else if (src + 3 >= src_end
5774                        || src[msb + 2] != 0
5775                        || src[lsb + 2] != '\n')
5776                 this_eol = EOL_SEEN_CR;
5777               else
5778                 this_eol = EOL_SEEN_CRLF;
5779
5780               if (eol_seen == EOL_SEEN_NONE)
5781                 /* This is the first end-of-line.  */
5782                 eol_seen = this_eol;
5783               else if (eol_seen != this_eol)
5784                 {
5785                   /* The found type is different from what found before.  */
5786                   eol_seen = EOL_SEEN_LF;
5787                   break;
5788                 }
5789               if (++total == MAX_EOL_CHECK_COUNT)
5790                 break;
5791             }
5792           src += 2;
5793         }
5794     }
5795   else
5796     {
5797       while (src < src_end)
5798         {
5799           c = *src++;
5800           if (c == '\n' || c == '\r')
5801             {
5802               int this_eol;
5803
5804               if (c == '\n')
5805                 this_eol = EOL_SEEN_LF;
5806               else if (src >= src_end || *src != '\n')
5807                 this_eol = EOL_SEEN_CR;
5808               else
5809                 this_eol = EOL_SEEN_CRLF, src++;
5810
5811               if (eol_seen == EOL_SEEN_NONE)
5812                 /* This is the first end-of-line.  */
5813                 eol_seen = this_eol;
5814               else if (eol_seen != this_eol)
5815                 {
5816                   /* The found type is different from what found before.  */
5817                   eol_seen = EOL_SEEN_LF;
5818                   break;
5819                 }
5820               if (++total == MAX_EOL_CHECK_COUNT)
5821                 break;
5822             }
5823         }
5824     }
5825   return eol_seen;
5826 }
5827
5828
5829 static Lisp_Object
5830 adjust_coding_eol_type (coding, eol_seen)
5831      struct coding_system *coding;
5832      int eol_seen;
5833 {
5834   Lisp_Object eol_type;
5835
5836   eol_type = CODING_ID_EOL_TYPE (coding->id);
5837   if (eol_seen & EOL_SEEN_LF)
5838     {
5839       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5840       eol_type = Qunix;
5841     }
5842   else if (eol_seen & EOL_SEEN_CRLF)
5843     {
5844       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5845       eol_type = Qdos;
5846     }
5847   else if (eol_seen & EOL_SEEN_CR)
5848     {
5849       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5850       eol_type = Qmac;
5851     }
5852   return eol_type;
5853 }
5854
5855 /* Detect how a text specified in CODING is encoded.  If a coding
5856    system is detected, update fields of CODING by the detected coding
5857    system.  */
5858
5859 void
5860 detect_coding (coding)
5861      struct coding_system *coding;
5862 {
5863   const unsigned char *src, *src_end;
5864
5865   coding->consumed = coding->consumed_char = 0;
5866   coding->produced = coding->produced_char = 0;
5867   coding_set_source (coding);
5868
5869   src_end = coding->source + coding->src_bytes;
5870   coding->head_ascii = 0;
5871
5872   /* If we have not yet decided the text encoding type, detect it
5873      now.  */
5874   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5875     {
5876       int c, i;
5877       struct coding_detection_info detect_info;
5878       int null_byte_found = 0, eight_bit_found = 0;
5879
5880       detect_info.checked = detect_info.found = detect_info.rejected = 0;
5881       for (src = coding->source; src < src_end; src++)
5882         {
5883           c = *src;
5884           if (c & 0x80)
5885             {
5886               eight_bit_found = 1;
5887               if (null_byte_found)
5888                 break;
5889             }
5890           else if (c < 0x20)
5891             {
5892               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5893                   && ! inhibit_iso_escape_detection
5894                   && ! detect_info.checked)
5895                 {
5896                   if (detect_coding_iso_2022 (coding, &detect_info))
5897                     {
5898                       /* We have scanned the whole data.  */
5899                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5900                         {
5901                           /* We didn't find an 8-bit code.  We may
5902                              have found a null-byte, but it's very
5903                              rare that a binary file confirm to
5904                              ISO-2022.  */
5905                           src = src_end;
5906                           coding->head_ascii = src - coding->source;
5907                         }
5908                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
5909                       break;
5910                     }
5911                 }
5912               else if (! c && !inhibit_null_byte_detection)
5913                 {
5914                   null_byte_found = 1;
5915                   if (eight_bit_found)
5916                     break;
5917                 }
5918               if (! eight_bit_found)
5919                 coding->head_ascii++;
5920             }
5921           else if (! eight_bit_found)
5922             coding->head_ascii++;
5923         }
5924
5925       if (null_byte_found || eight_bit_found
5926           || coding->head_ascii < coding->src_bytes
5927           || detect_info.found)
5928         {
5929           enum coding_category category;
5930           struct coding_system *this;
5931
5932           if (coding->head_ascii == coding->src_bytes)
5933             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
5934             for (i = 0; i < coding_category_raw_text; i++)
5935               {
5936                 category = coding_priorities[i];
5937                 this = coding_categories + category;
5938                 if (detect_info.found & (1 << category))
5939                   break;
5940               }
5941           else
5942             {
5943               if (null_byte_found)
5944                 {
5945                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
5946                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
5947                 }
5948               for (i = 0; i < coding_category_raw_text; i++)
5949                 {
5950                   category = coding_priorities[i];
5951                   this = coding_categories + category;
5952                   if (this->id < 0)
5953                     {
5954                       /* No coding system of this category is defined.  */
5955                       detect_info.rejected |= (1 << category);
5956                     }
5957                   else if (category >= coding_category_raw_text)
5958                     continue;
5959                   else if (detect_info.checked & (1 << category))
5960                     {
5961                       if (detect_info.found & (1 << category))
5962                         break;
5963                     }
5964                   else if ((*(this->detector)) (coding, &detect_info)
5965                            && detect_info.found & (1 << category))
5966                     {
5967                       if (category == coding_category_utf_16_auto)
5968                         {
5969                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5970                             category = coding_category_utf_16_le;
5971                           else
5972                             category = coding_category_utf_16_be;
5973                         }
5974                       break;
5975                     }
5976                 }
5977             }
5978
5979           if (i < coding_category_raw_text)
5980             setup_coding_system (CODING_ID_NAME (this->id), coding);
5981           else if (null_byte_found)
5982             setup_coding_system (Qno_conversion, coding);
5983           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
5984                    == CATEGORY_MASK_ANY)
5985             setup_coding_system (Qraw_text, coding);
5986           else if (detect_info.rejected)
5987             for (i = 0; i < coding_category_raw_text; i++)
5988               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5989                 {
5990                   this = coding_categories + coding_priorities[i];
5991                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5992                   break;
5993                 }
5994         }
5995     }
5996   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5997            == coding_category_utf_8_auto)
5998     {
5999       Lisp_Object coding_systems;
6000       struct coding_detection_info detect_info;
6001
6002       coding_systems
6003         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6004       detect_info.found = detect_info.rejected = 0;
6005       coding->head_ascii = 0;
6006       if (CONSP (coding_systems)
6007           && detect_coding_utf_8 (coding, &detect_info))
6008         {
6009           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6010             setup_coding_system (XCAR (coding_systems), coding);
6011           else
6012             setup_coding_system (XCDR (coding_systems), coding);
6013         }
6014     }
6015   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6016            == coding_category_utf_16_auto)
6017     {
6018       Lisp_Object coding_systems;
6019       struct coding_detection_info detect_info;
6020
6021       coding_systems
6022         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6023       detect_info.found = detect_info.rejected = 0;
6024       coding->head_ascii = 0;
6025       if (CONSP (coding_systems)
6026           && detect_coding_utf_16 (coding, &detect_info))
6027         {
6028           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6029             setup_coding_system (XCAR (coding_systems), coding);
6030           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6031             setup_coding_system (XCDR (coding_systems), coding);
6032         }
6033     }
6034 }
6035
6036
6037 static void
6038 decode_eol (coding)
6039      struct coding_system *coding;
6040 {
6041   Lisp_Object eol_type;
6042   unsigned char *p, *pbeg, *pend;
6043
6044   eol_type = CODING_ID_EOL_TYPE (coding->id);
6045   if (EQ (eol_type, Qunix))
6046     return;
6047
6048   if (NILP (coding->dst_object))
6049     pbeg = coding->destination;
6050   else
6051     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6052   pend = pbeg + coding->produced;
6053
6054   if (VECTORP (eol_type))
6055     {
6056       int eol_seen = EOL_SEEN_NONE;
6057
6058       for (p = pbeg; p < pend; p++)
6059         {
6060           if (*p == '\n')
6061             eol_seen |= EOL_SEEN_LF;
6062           else if (*p == '\r')
6063             {
6064               if (p + 1 < pend && *(p + 1) == '\n')
6065                 {
6066                   eol_seen |= EOL_SEEN_CRLF;
6067                   p++;
6068                 }
6069               else
6070                 eol_seen |= EOL_SEEN_CR;
6071             }
6072         }
6073       if (eol_seen != EOL_SEEN_NONE
6074           && eol_seen != EOL_SEEN_LF
6075           && eol_seen != EOL_SEEN_CRLF
6076           && eol_seen != EOL_SEEN_CR)
6077         eol_seen = EOL_SEEN_LF;
6078       if (eol_seen != EOL_SEEN_NONE)
6079         eol_type = adjust_coding_eol_type (coding, eol_seen);
6080     }
6081
6082   if (EQ (eol_type, Qmac))
6083     {
6084       for (p = pbeg; p < pend; p++)
6085         if (*p == '\r')
6086           *p = '\n';
6087     }
6088   else if (EQ (eol_type, Qdos))
6089     {
6090       int n = 0;
6091
6092       if (NILP (coding->dst_object))
6093         {
6094           /* Start deleting '\r' from the tail to minimize the memory
6095              movement.  */
6096           for (p = pend - 2; p >= pbeg; p--)
6097             if (*p == '\r')
6098               {
6099                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
6100                 n++;
6101               }
6102         }
6103       else
6104         {
6105           int pos_byte = coding->dst_pos_byte;
6106           int pos = coding->dst_pos;
6107           int pos_end = pos + coding->produced_char - 1;
6108
6109           while (pos < pos_end)
6110             {
6111               p = BYTE_POS_ADDR (pos_byte);
6112               if (*p == '\r' && p[1] == '\n')
6113                 {
6114                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6115                   n++;
6116                   pos_end--;
6117                 }
6118               pos++;
6119               if (coding->dst_multibyte)
6120                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6121               else
6122                 pos_byte++;
6123             }
6124         }
6125       coding->produced -= n;
6126       coding->produced_char -= n;
6127     }
6128 }
6129
6130
6131 /* Return a translation table (or list of them) from coding system
6132    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6133    decoding (ENCODEP is zero). */
6134
6135 static Lisp_Object
6136 get_translation_table (attrs, encodep, max_lookup)
6137      Lisp_Object attrs;
6138      int encodep, *max_lookup;
6139 {
6140   Lisp_Object standard, translation_table;
6141   Lisp_Object val;
6142
6143   if (encodep)
6144     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6145       standard = Vstandard_translation_table_for_encode;
6146   else
6147     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6148       standard = Vstandard_translation_table_for_decode;
6149   if (NILP (translation_table))
6150     translation_table = standard;
6151   else
6152     {
6153       if (SYMBOLP (translation_table))
6154         translation_table = Fget (translation_table, Qtranslation_table);
6155       else if (CONSP (translation_table))
6156         {
6157           translation_table = Fcopy_sequence (translation_table);
6158           for (val = translation_table; CONSP (val); val = XCDR (val))
6159             if (SYMBOLP (XCAR (val)))
6160               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6161         }
6162       if (CHAR_TABLE_P (standard))
6163         {
6164           if (CONSP (translation_table))
6165             translation_table = nconc2 (translation_table,
6166                                         Fcons (standard, Qnil));
6167           else
6168             translation_table = Fcons (translation_table,
6169                                        Fcons (standard, Qnil));
6170         }
6171     }
6172
6173   if (max_lookup)
6174     {
6175       *max_lookup = 1;
6176       if (CHAR_TABLE_P (translation_table)
6177           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6178         {
6179           val = XCHAR_TABLE (translation_table)->extras[1];
6180           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6181             *max_lookup = XFASTINT (val);
6182         }
6183       else if (CONSP (translation_table))
6184         {
6185           Lisp_Object tail, val;
6186
6187           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6188             if (CHAR_TABLE_P (XCAR (tail))
6189                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6190               {
6191                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6192                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6193                   *max_lookup = XFASTINT (val);
6194               }
6195         }
6196     }
6197   return translation_table;
6198 }
6199
6200 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6201   do {                                                          \
6202     trans = Qnil;                                               \
6203     if (CHAR_TABLE_P (table))                                   \
6204       {                                                         \
6205         trans = CHAR_TABLE_REF (table, c);                      \
6206         if (CHARACTERP (trans))                                 \
6207           c = XFASTINT (trans), trans = Qnil;                   \
6208       }                                                         \
6209     else if (CONSP (table))                                     \
6210       {                                                         \
6211         Lisp_Object tail;                                       \
6212                                                                 \
6213         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6214           if (CHAR_TABLE_P (XCAR (tail)))                       \
6215             {                                                   \
6216               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6217               if (CHARACTERP (trans))                           \
6218                 c = XFASTINT (trans), trans = Qnil;             \
6219               else if (! NILP (trans))                          \
6220                 break;                                          \
6221             }                                                   \
6222       }                                                         \
6223   } while (0)
6224
6225
6226 static Lisp_Object
6227 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
6228      Lisp_Object val;
6229      int *buf, *buf_end;
6230      int last_block;
6231      int *from_nchars, *to_nchars;
6232 {
6233   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
6234      [TO-CHAR ...].  */
6235   if (CONSP (val))
6236     {
6237       Lisp_Object from, tail;
6238       int i, len;
6239
6240       for (tail = val; CONSP (tail); tail = XCDR (tail))
6241         {
6242           val = XCAR (tail);
6243           from = XCAR (val);
6244           len = ASIZE (from);
6245           for (i = 0; i < len; i++)
6246             {
6247               if (buf + i == buf_end)
6248                 {
6249                   if (! last_block)
6250                     return Qt;
6251                   break;
6252                 }
6253               if (XINT (AREF (from, i)) != buf[i])
6254                 break;
6255             }
6256           if (i == len)
6257             {
6258               val = XCDR (val);
6259               *from_nchars = len;
6260               break;
6261             }
6262         }
6263       if (! CONSP (tail))
6264         return Qnil;
6265     }
6266   if (VECTORP (val))
6267     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6268   else
6269     *buf = XINT (val);
6270   return val;
6271 }
6272
6273
6274 static int
6275 produce_chars (coding, translation_table, last_block)
6276      struct coding_system *coding;
6277      Lisp_Object translation_table;
6278      int last_block;
6279 {
6280   unsigned char *dst = coding->destination + coding->produced;
6281   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6282   EMACS_INT produced;
6283   EMACS_INT produced_chars = 0;
6284   int carryover = 0;
6285
6286   if (! coding->chars_at_source)
6287     {
6288       /* Source characters are in coding->charbuf.  */
6289       int *buf = coding->charbuf;
6290       int *buf_end = buf + coding->charbuf_used;
6291
6292       if (EQ (coding->src_object, coding->dst_object))
6293         {
6294           coding_set_source (coding);
6295           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6296         }
6297
6298       while (buf < buf_end)
6299         {
6300           int c = *buf, i;
6301
6302           if (c >= 0)
6303             {
6304               int from_nchars = 1, to_nchars = 1;
6305               Lisp_Object trans = Qnil;
6306
6307               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6308               if (! NILP (trans))
6309                 {
6310                   trans = get_translation (trans, buf, buf_end, last_block,
6311                                            &from_nchars, &to_nchars);
6312                   if (EQ (trans, Qt))
6313                     break;
6314                   c = *buf;
6315                 }
6316
6317               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6318                 {
6319                   dst = alloc_destination (coding,
6320                                            buf_end - buf
6321                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6322                                            dst);
6323                   if (EQ (coding->src_object, coding->dst_object))
6324                     {
6325                       coding_set_source (coding);
6326                       dst_end = ((unsigned char *) coding->source) + coding->consumed;
6327                     }
6328                   else
6329                     dst_end = coding->destination + coding->dst_bytes;
6330                 }
6331
6332               for (i = 0; i < to_nchars; i++)
6333                 {
6334                   if (i > 0)
6335                     c = XINT (AREF (trans, i));
6336                   if (coding->dst_multibyte
6337                       || ! CHAR_BYTE8_P (c))
6338                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6339                   else
6340                     *dst++ = CHAR_TO_BYTE8 (c);
6341                 }
6342               produced_chars += to_nchars;
6343               *buf++ = to_nchars;
6344               while (--from_nchars > 0)
6345                 *buf++ = 0;
6346             }
6347           else
6348             /* This is an annotation datum.  (-C) is the length.  */
6349             buf += -c;
6350         }
6351       carryover = buf_end - buf;
6352     }
6353   else
6354     {
6355       /* Source characters are at coding->source.  */
6356       const unsigned char *src = coding->source;
6357       const unsigned char *src_end = src + coding->consumed;
6358
6359       if (EQ (coding->dst_object, coding->src_object))
6360         dst_end = (unsigned char *) src;
6361       if (coding->src_multibyte != coding->dst_multibyte)
6362         {
6363           if (coding->src_multibyte)
6364             {
6365               int multibytep = 1;
6366               EMACS_INT consumed_chars = 0;
6367
6368               while (1)
6369                 {
6370                   const unsigned char *src_base = src;
6371                   int c;
6372
6373                   ONE_MORE_BYTE (c);
6374                   if (dst == dst_end)
6375                     {
6376                       if (EQ (coding->src_object, coding->dst_object))
6377                         dst_end = (unsigned char *) src;
6378                       if (dst == dst_end)
6379                         {
6380                           EMACS_INT offset = src - coding->source;
6381
6382                           dst = alloc_destination (coding, src_end - src + 1,
6383                                                    dst);
6384                           dst_end = coding->destination + coding->dst_bytes;
6385                           coding_set_source (coding);
6386                           src = coding->source + offset;
6387                           src_end = coding->source + coding->src_bytes;
6388                           if (EQ (coding->src_object, coding->dst_object))
6389                             dst_end = (unsigned char *) src;
6390                         }
6391                     }
6392                   *dst++ = c;
6393                   produced_chars++;
6394                 }
6395             no_more_source:
6396               ;
6397             }
6398           else
6399             while (src < src_end)
6400               {
6401                 int multibytep = 1;
6402                 int c = *src++;
6403
6404                 if (dst >= dst_end - 1)
6405                   {
6406                     if (EQ (coding->src_object, coding->dst_object))
6407                       dst_end = (unsigned char *) src;
6408                     if (dst >= dst_end - 1)
6409                       {
6410                         EMACS_INT offset = src - coding->source;
6411                         EMACS_INT more_bytes;
6412
6413                         if (EQ (coding->src_object, coding->dst_object))
6414                           more_bytes = ((src_end - src) / 2) + 2;
6415                         else
6416                           more_bytes = src_end - src + 2;
6417                         dst = alloc_destination (coding, more_bytes, dst);
6418                         dst_end = coding->destination + coding->dst_bytes;
6419                         coding_set_source (coding);
6420                         src = coding->source + offset;
6421                         src_end = coding->source + coding->src_bytes;
6422                         if (EQ (coding->src_object, coding->dst_object))
6423                           dst_end = (unsigned char *) src;
6424                       }
6425                   }
6426                 EMIT_ONE_BYTE (c);
6427               }
6428         }
6429       else
6430         {
6431           if (!EQ (coding->src_object, coding->dst_object))
6432             {
6433               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6434
6435               if (require > 0)
6436                 {
6437                   EMACS_INT offset = src - coding->source;
6438
6439                   dst = alloc_destination (coding, require, dst);
6440                   coding_set_source (coding);
6441                   src = coding->source + offset;
6442                   src_end = coding->source + coding->src_bytes;
6443                 }
6444             }
6445           produced_chars = coding->consumed_char;
6446           while (src < src_end)
6447             *dst++ = *src++;
6448         }
6449     }
6450
6451   produced = dst - (coding->destination + coding->produced);
6452   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6453     insert_from_gap (produced_chars, produced);
6454   coding->produced += produced;
6455   coding->produced_char += produced_chars;
6456   return carryover;
6457 }
6458
6459 /* Compose text in CODING->object according to the annotation data at
6460    CHARBUF.  CHARBUF is an array:
6461      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
6462  */
6463
6464 static INLINE void
6465 produce_composition (coding, charbuf, pos)
6466      struct coding_system *coding;
6467      int *charbuf;
6468      EMACS_INT pos;
6469 {
6470   int len;
6471   EMACS_INT to;
6472   enum composition_method method;
6473   Lisp_Object components;
6474
6475   len = -charbuf[0];
6476   to = pos + charbuf[2];
6477   if (to <= pos)
6478     return;
6479   method = (enum composition_method) (charbuf[3]);
6480
6481   if (method == COMPOSITION_RELATIVE)
6482     components = Qnil;
6483   else if (method >= COMPOSITION_WITH_RULE
6484            && method <= COMPOSITION_WITH_RULE_ALTCHARS)
6485     {
6486       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6487       int i;
6488
6489       len -= 4;
6490       charbuf += 4;
6491       for (i = 0; i < len; i++)
6492         {
6493           args[i] = make_number (charbuf[i]);
6494           if (charbuf[i] < 0)
6495             return;
6496         }
6497       components = (method == COMPOSITION_WITH_ALTCHARS
6498                     ? Fstring (len, args) : Fvector (len, args));
6499     }
6500   else
6501     return;
6502   compose_text (pos, to, components, Qnil, coding->dst_object);
6503 }
6504
6505
6506 /* Put `charset' property on text in CODING->object according to
6507    the annotation data at CHARBUF.  CHARBUF is an array:
6508      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6509  */
6510
6511 static INLINE void
6512 produce_charset (coding, charbuf, pos)
6513      struct coding_system *coding;
6514      int *charbuf;
6515      EMACS_INT pos;
6516 {
6517   EMACS_INT from = pos - charbuf[2];
6518   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6519
6520   Fput_text_property (make_number (from), make_number (pos),
6521                       Qcharset, CHARSET_NAME (charset),
6522                       coding->dst_object);
6523 }
6524
6525
6526 #define CHARBUF_SIZE 0x4000
6527
6528 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6529   do {                                                                  \
6530     int size = CHARBUF_SIZE;;                                           \
6531                                                                         \
6532     coding->charbuf = NULL;                                             \
6533     while (size > 1024)                                                 \
6534       {                                                                 \
6535         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6536         if (coding->charbuf)                                            \
6537           break;                                                        \
6538         size >>= 1;                                                     \
6539       }                                                                 \
6540     if (! coding->charbuf)                                              \
6541       {                                                                 \
6542         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6543         return coding->result;                                          \
6544       }                                                                 \
6545     coding->charbuf_size = size;                                        \
6546   } while (0)
6547
6548
6549 static void
6550 produce_annotation (coding, pos)
6551      struct coding_system *coding;
6552      EMACS_INT pos;
6553 {
6554   int *charbuf = coding->charbuf;
6555   int *charbuf_end = charbuf + coding->charbuf_used;
6556
6557   if (NILP (coding->dst_object))
6558     return;
6559
6560   while (charbuf < charbuf_end)
6561     {
6562       if (*charbuf >= 0)
6563         pos += *charbuf++;
6564       else
6565         {
6566           int len = -*charbuf;
6567           switch (charbuf[1])
6568             {
6569             case CODING_ANNOTATE_COMPOSITION_MASK:
6570               produce_composition (coding, charbuf, pos);
6571               break;
6572             case CODING_ANNOTATE_CHARSET_MASK:
6573               produce_charset (coding, charbuf, pos);
6574               break;
6575             default:
6576               abort ();
6577             }
6578           charbuf += len;
6579         }
6580     }
6581 }
6582
6583 /* Decode the data at CODING->src_object into CODING->dst_object.
6584    CODING->src_object is a buffer, a string, or nil.
6585    CODING->dst_object is a buffer.
6586
6587    If CODING->src_object is a buffer, it must be the current buffer.
6588    In this case, if CODING->src_pos is positive, it is a position of
6589    the source text in the buffer, otherwise, the source text is in the
6590    gap area of the buffer, and CODING->src_pos specifies the offset of
6591    the text from GPT (which must be the same as PT).  If this is the
6592    same buffer as CODING->dst_object, CODING->src_pos must be
6593    negative.
6594
6595    If CODING->src_object is a string, CODING->src_pos is an index to
6596    that string.
6597
6598    If CODING->src_object is nil, CODING->source must already point to
6599    the non-relocatable memory area.  In this case, CODING->src_pos is
6600    an offset from CODING->source.
6601
6602    The decoded data is inserted at the current point of the buffer
6603    CODING->dst_object.
6604 */
6605
6606 static int
6607 decode_coding (coding)
6608      struct coding_system *coding;
6609 {
6610   Lisp_Object attrs;
6611   Lisp_Object undo_list;
6612   Lisp_Object translation_table;
6613   int carryover;
6614   int i;
6615
6616   if (BUFFERP (coding->src_object)
6617       && coding->src_pos > 0
6618       && coding->src_pos < GPT
6619       && coding->src_pos + coding->src_chars > GPT)
6620     move_gap_both (coding->src_pos, coding->src_pos_byte);
6621
6622   undo_list = Qt;
6623   if (BUFFERP (coding->dst_object))
6624     {
6625       if (current_buffer != XBUFFER (coding->dst_object))
6626         set_buffer_internal (XBUFFER (coding->dst_object));
6627       if (GPT != PT)
6628         move_gap_both (PT, PT_BYTE);
6629       undo_list = current_buffer->undo_list;
6630       current_buffer->undo_list = Qt;
6631     }
6632
6633   coding->consumed = coding->consumed_char = 0;
6634   coding->produced = coding->produced_char = 0;
6635   coding->chars_at_source = 0;
6636   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6637   coding->errors = 0;
6638
6639   ALLOC_CONVERSION_WORK_AREA (coding);
6640
6641   attrs = CODING_ID_ATTRS (coding->id);
6642   translation_table = get_translation_table (attrs, 0, NULL);
6643
6644   carryover = 0;
6645   do
6646     {
6647       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6648
6649       coding_set_source (coding);
6650       coding->annotated = 0;
6651       coding->charbuf_used = carryover;
6652       (*(coding->decoder)) (coding);
6653       coding_set_destination (coding);
6654       carryover = produce_chars (coding, translation_table, 0);
6655       if (coding->annotated)
6656         produce_annotation (coding, pos);
6657       for (i = 0; i < carryover; i++)
6658         coding->charbuf[i]
6659           = coding->charbuf[coding->charbuf_used - carryover + i];
6660     }
6661   while (coding->consumed < coding->src_bytes
6662          && (coding->result == CODING_RESULT_SUCCESS
6663              || coding->result == CODING_RESULT_INVALID_SRC));
6664
6665   if (carryover > 0)
6666     {
6667       coding_set_destination (coding);
6668       coding->charbuf_used = carryover;
6669       produce_chars (coding, translation_table, 1);
6670     }
6671
6672   coding->carryover_bytes = 0;
6673   if (coding->consumed < coding->src_bytes)
6674     {
6675       int nbytes = coding->src_bytes - coding->consumed;
6676       const unsigned char *src;
6677
6678       coding_set_source (coding);
6679       coding_set_destination (coding);
6680       src = coding->source + coding->consumed;
6681
6682       if (coding->mode & CODING_MODE_LAST_BLOCK)
6683         {
6684           /* Flush out unprocessed data as binary chars.  We are sure
6685              that the number of data is less than the size of
6686              coding->charbuf.  */
6687           coding->charbuf_used = 0;
6688           coding->chars_at_source = 0;
6689
6690           while (nbytes-- > 0)
6691             {
6692               int c = *src++;
6693
6694               if (c & 0x80)
6695                 c = BYTE8_TO_CHAR (c);
6696               coding->charbuf[coding->charbuf_used++] = c;
6697             }
6698           produce_chars (coding, Qnil, 1);
6699         }
6700       else
6701         {
6702           /* Record unprocessed bytes in coding->carryover.  We are
6703              sure that the number of data is less than the size of
6704              coding->carryover.  */
6705           unsigned char *p = coding->carryover;
6706
6707           coding->carryover_bytes = nbytes;
6708           while (nbytes-- > 0)
6709             *p++ = *src++;
6710         }
6711       coding->consumed = coding->src_bytes;
6712     }
6713
6714   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6715     decode_eol (coding);
6716   if (BUFFERP (coding->dst_object))
6717     {
6718       current_buffer->undo_list = undo_list;
6719       record_insert (coding->dst_pos, coding->produced_char);
6720     }
6721   return coding->result;
6722 }
6723
6724
6725 /* Extract an annotation datum from a composition starting at POS and
6726    ending before LIMIT of CODING->src_object (buffer or string), store
6727    the data in BUF, set *STOP to a starting position of the next
6728    composition (if any) or to LIMIT, and return the address of the
6729    next element of BUF.
6730
6731    If such an annotation is not found, set *STOP to a starting
6732    position of a composition after POS (if any) or to LIMIT, and
6733    return BUF.  */
6734
6735 static INLINE int *
6736 handle_composition_annotation (pos, limit, coding, buf, stop)
6737      EMACS_INT pos, limit;
6738      struct coding_system *coding;
6739      int *buf;
6740      EMACS_INT *stop;
6741 {
6742   EMACS_INT start, end;
6743   Lisp_Object prop;
6744
6745   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6746       || end > limit)
6747     *stop = limit;
6748   else if (start > pos)
6749     *stop = start;
6750   else
6751     {
6752       if (start == pos)
6753         {
6754           /* We found a composition.  Store the corresponding
6755              annotation data in BUF.  */
6756           int *head = buf;
6757           enum composition_method method = COMPOSITION_METHOD (prop);
6758           int nchars = COMPOSITION_LENGTH (prop);
6759
6760           ADD_COMPOSITION_DATA (buf, nchars, method);
6761           if (method != COMPOSITION_RELATIVE)
6762             {
6763               Lisp_Object components;
6764               int len, i, i_byte;
6765
6766               components = COMPOSITION_COMPONENTS (prop);
6767               if (VECTORP (components))
6768                 {
6769                   len = XVECTOR (components)->size;
6770                   for (i = 0; i < len; i++)
6771                     *buf++ = XINT (AREF (components, i));
6772                 }
6773               else if (STRINGP (components))
6774                 {
6775                   len = SCHARS (components);
6776                   i = i_byte = 0;
6777                   while (i < len)
6778                     {
6779                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6780                       buf++;
6781                     }
6782                 }
6783               else if (INTEGERP (components))
6784                 {
6785                   len = 1;
6786                   *buf++ = XINT (components);
6787                 }
6788               else if (CONSP (components))
6789                 {
6790                   for (len = 0; CONSP (components);
6791                        len++, components = XCDR (components))
6792                     *buf++ = XINT (XCAR (components));
6793                 }
6794               else
6795                 abort ();
6796               *head -= len;
6797             }
6798         }
6799
6800       if (find_composition (end, limit, &start, &end, &prop,
6801                             coding->src_object)
6802           && end <= limit)
6803         *stop = start;
6804       else
6805         *stop = limit;
6806     }
6807   return buf;
6808 }
6809
6810
6811 /* Extract an annotation datum from a text property `charset' at POS of
6812    CODING->src_object (buffer of string), store the data in BUF, set
6813    *STOP to the position where the value of `charset' property changes
6814    (limiting by LIMIT), and return the address of the next element of
6815    BUF.
6816
6817    If the property value is nil, set *STOP to the position where the
6818    property value is non-nil (limiting by LIMIT), and return BUF.  */
6819
6820 static INLINE int *
6821 handle_charset_annotation (pos, limit, coding, buf, stop)
6822      EMACS_INT pos, limit;
6823      struct coding_system *coding;
6824      int *buf;
6825      EMACS_INT *stop;
6826 {
6827   Lisp_Object val, next;
6828   int id;
6829
6830   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6831   if (! NILP (val) && CHARSETP (val))
6832     id = XINT (CHARSET_SYMBOL_ID (val));
6833   else
6834     id = -1;
6835   ADD_CHARSET_DATA (buf, 0, id);
6836   next = Fnext_single_property_change (make_number (pos), Qcharset,
6837                                        coding->src_object,
6838                                        make_number (limit));
6839   *stop = XINT (next);
6840   return buf;
6841 }
6842
6843
6844 static void
6845 consume_chars (coding, translation_table, max_lookup)
6846      struct coding_system *coding;
6847      Lisp_Object translation_table;
6848      int max_lookup;
6849 {
6850   int *buf = coding->charbuf;
6851   int *buf_end = coding->charbuf + coding->charbuf_size;
6852   const unsigned char *src = coding->source + coding->consumed;
6853   const unsigned char *src_end = coding->source + coding->src_bytes;
6854   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6855   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6856   int multibytep = coding->src_multibyte;
6857   Lisp_Object eol_type;
6858   int c;
6859   EMACS_INT stop, stop_composition, stop_charset;
6860   int *lookup_buf = NULL;
6861
6862   if (! NILP (translation_table))
6863     lookup_buf = alloca (sizeof (int) * max_lookup);
6864
6865   eol_type = CODING_ID_EOL_TYPE (coding->id);
6866   if (VECTORP (eol_type))
6867     eol_type = Qunix;
6868
6869   /* Note: composition handling is not yet implemented.  */
6870   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6871
6872   if (NILP (coding->src_object))
6873     stop = stop_composition = stop_charset = end_pos;
6874   else
6875     {
6876       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6877         stop = stop_composition = pos;
6878       else
6879         stop = stop_composition = end_pos;
6880       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6881         stop = stop_charset = pos;
6882       else
6883         stop_charset = end_pos;
6884     }
6885
6886   /* Compensate for CRLF and conversion.  */
6887   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6888   while (buf < buf_end)
6889     {
6890       Lisp_Object trans;
6891
6892       if (pos == stop)
6893         {
6894           if (pos == end_pos)
6895             break;
6896           if (pos == stop_composition)
6897             buf = handle_composition_annotation (pos, end_pos, coding,
6898                                                  buf, &stop_composition);
6899           if (pos == stop_charset)
6900             buf = handle_charset_annotation (pos, end_pos, coding,
6901                                              buf, &stop_charset);
6902           stop = (stop_composition < stop_charset
6903                   ? stop_composition : stop_charset);
6904         }
6905
6906       if (! multibytep)
6907         {
6908           EMACS_INT bytes;
6909
6910           if (coding->encoder == encode_coding_raw_text)
6911             c = *src++, pos++;
6912           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6913             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
6914           else
6915             c = BYTE8_TO_CHAR (*src), src++, pos++;
6916         }
6917       else
6918         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
6919       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6920         c = '\n';
6921       if (! EQ (eol_type, Qunix))
6922         {
6923           if (c == '\n')
6924             {
6925               if (EQ (eol_type, Qdos))
6926                 *buf++ = '\r';
6927               else
6928                 c = '\r';
6929             }
6930         }
6931
6932       trans = Qnil;
6933       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6934       if (NILP (trans))
6935         *buf++ = c;
6936       else
6937         {
6938           int from_nchars = 1, to_nchars = 1;
6939           int *lookup_buf_end;
6940           const unsigned char *p = src;
6941           int i;
6942
6943           lookup_buf[0] = c;
6944           for (i = 1; i < max_lookup && p < src_end; i++)
6945             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6946           lookup_buf_end = lookup_buf + i;
6947           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6948                                    &from_nchars, &to_nchars);
6949           if (EQ (trans, Qt)
6950               || buf + to_nchars > buf_end)
6951             break;
6952           *buf++ = *lookup_buf;
6953           for (i = 1; i < to_nchars; i++)
6954             *buf++ = XINT (AREF (trans, i));
6955           for (i = 1; i < from_nchars; i++, pos++)
6956             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6957         }
6958     }
6959
6960   coding->consumed = src - coding->source;
6961   coding->consumed_char = pos - coding->src_pos;
6962   coding->charbuf_used = buf - coding->charbuf;
6963   coding->chars_at_source = 0;
6964 }
6965
6966
6967 /* Encode the text at CODING->src_object into CODING->dst_object.
6968    CODING->src_object is a buffer or a string.
6969    CODING->dst_object is a buffer or nil.
6970
6971    If CODING->src_object is a buffer, it must be the current buffer.
6972    In this case, if CODING->src_pos is positive, it is a position of
6973    the source text in the buffer, otherwise. the source text is in the
6974    gap area of the buffer, and coding->src_pos specifies the offset of
6975    the text from GPT (which must be the same as PT).  If this is the
6976    same buffer as CODING->dst_object, CODING->src_pos must be
6977    negative and CODING should not have `pre-write-conversion'.
6978
6979    If CODING->src_object is a string, CODING should not have
6980    `pre-write-conversion'.
6981
6982    If CODING->dst_object is a buffer, the encoded data is inserted at
6983    the current point of that buffer.
6984
6985    If CODING->dst_object is nil, the encoded data is placed at the
6986    memory area specified by CODING->destination.  */
6987
6988 static int
6989 encode_coding (coding)
6990      struct coding_system *coding;
6991 {
6992   Lisp_Object attrs;
6993   Lisp_Object translation_table;
6994   int max_lookup;
6995
6996   attrs = CODING_ID_ATTRS (coding->id);
6997   if (coding->encoder == encode_coding_raw_text)
6998     translation_table = Qnil, max_lookup = 0;
6999   else
7000     translation_table = get_translation_table (attrs, 1, &max_lookup);
7001
7002   if (BUFFERP (coding->dst_object))
7003     {
7004       set_buffer_internal (XBUFFER (coding->dst_object));
7005       coding->dst_multibyte
7006         = ! NILP (current_buffer->enable_multibyte_characters);
7007     }
7008
7009   coding->consumed = coding->consumed_char = 0;
7010   coding->produced = coding->produced_char = 0;
7011   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7012   coding->errors = 0;
7013
7014   ALLOC_CONVERSION_WORK_AREA (coding);
7015
7016   do {
7017     coding_set_source (coding);
7018     consume_chars (coding, translation_table, max_lookup);
7019     coding_set_destination (coding);
7020     (*(coding->encoder)) (coding);
7021   } while (coding->consumed_char < coding->src_chars);
7022
7023   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7024     insert_from_gap (coding->produced_char, coding->produced);
7025
7026   return (coding->result);
7027 }
7028
7029
7030 /* Name (or base name) of work buffer for code conversion.  */
7031 static Lisp_Object Vcode_conversion_workbuf_name;
7032
7033 /* A working buffer used by the top level conversion.  Once it is
7034    created, it is never destroyed.  It has the name
7035    Vcode_conversion_workbuf_name.  The other working buffers are
7036    destroyed after the use is finished, and their names are modified
7037    versions of Vcode_conversion_workbuf_name.  */
7038 static Lisp_Object Vcode_conversion_reused_workbuf;
7039
7040 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7041 static int reused_workbuf_in_use;
7042
7043
7044 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7045    multibyteness of returning buffer.  */
7046
7047 static Lisp_Object
7048 make_conversion_work_buffer (multibyte)
7049      int multibyte;
7050 {
7051   Lisp_Object name, workbuf;
7052   struct buffer *current;
7053
7054   if (reused_workbuf_in_use++)
7055     {
7056       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7057       workbuf = Fget_buffer_create (name);
7058     }
7059   else
7060     {
7061       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7062         Vcode_conversion_reused_workbuf
7063           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7064       workbuf = Vcode_conversion_reused_workbuf;
7065     }
7066   current = current_buffer;
7067   set_buffer_internal (XBUFFER (workbuf));
7068   /* We can't allow modification hooks to run in the work buffer.  For
7069      instance, directory_files_internal assumes that file decoding
7070      doesn't compile new regexps.  */
7071   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7072   Ferase_buffer ();
7073   current_buffer->undo_list = Qt;
7074   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7075   set_buffer_internal (current);
7076   return workbuf;
7077 }
7078
7079
7080 static Lisp_Object
7081 code_conversion_restore (arg)
7082      Lisp_Object arg;
7083 {
7084   Lisp_Object current, workbuf;
7085   struct gcpro gcpro1;
7086
7087   GCPRO1 (arg);
7088   current = XCAR (arg);
7089   workbuf = XCDR (arg);
7090   if (! NILP (workbuf))
7091     {
7092       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7093         reused_workbuf_in_use = 0;
7094       else if (! NILP (Fbuffer_live_p (workbuf)))
7095         Fkill_buffer (workbuf);
7096     }
7097   set_buffer_internal (XBUFFER (current));
7098   UNGCPRO;
7099   return Qnil;
7100 }
7101
7102 Lisp_Object
7103 code_conversion_save (with_work_buf, multibyte)
7104      int with_work_buf, multibyte;
7105 {
7106   Lisp_Object workbuf = Qnil;
7107
7108   if (with_work_buf)
7109     workbuf = make_conversion_work_buffer (multibyte);
7110   record_unwind_protect (code_conversion_restore,
7111                          Fcons (Fcurrent_buffer (), workbuf));
7112   return workbuf;
7113 }
7114
7115 int
7116 decode_coding_gap (coding, chars, bytes)
7117      struct coding_system *coding;
7118      EMACS_INT chars, bytes;
7119 {
7120   int count = specpdl_ptr - specpdl;
7121   Lisp_Object attrs;
7122
7123   code_conversion_save (0, 0);
7124
7125   coding->src_object = Fcurrent_buffer ();
7126   coding->src_chars = chars;
7127   coding->src_bytes = bytes;
7128   coding->src_pos = -chars;
7129   coding->src_pos_byte = -bytes;
7130   coding->src_multibyte = chars < bytes;
7131   coding->dst_object = coding->src_object;
7132   coding->dst_pos = PT;
7133   coding->dst_pos_byte = PT_BYTE;
7134   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7135
7136   if (CODING_REQUIRE_DETECTION (coding))
7137     detect_coding (coding);
7138
7139   coding->mode |= CODING_MODE_LAST_BLOCK;
7140   current_buffer->text->inhibit_shrinking = 1;
7141   decode_coding (coding);
7142   current_buffer->text->inhibit_shrinking = 0;
7143
7144   attrs = CODING_ID_ATTRS (coding->id);
7145   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7146     {
7147       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7148       Lisp_Object val;
7149
7150       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7151       val = call1 (CODING_ATTR_POST_READ (attrs),
7152                    make_number (coding->produced_char));
7153       CHECK_NATNUM (val);
7154       coding->produced_char += Z - prev_Z;
7155       coding->produced += Z_BYTE - prev_Z_BYTE;
7156     }
7157
7158   unbind_to (count, Qnil);
7159   return coding->result;
7160 }
7161
7162 int
7163 encode_coding_gap (coding, chars, bytes)
7164      struct coding_system *coding;
7165      EMACS_INT chars, bytes;
7166 {
7167   int count = specpdl_ptr - specpdl;
7168
7169   code_conversion_save (0, 0);
7170
7171   coding->src_object = Fcurrent_buffer ();
7172   coding->src_chars = chars;
7173   coding->src_bytes = bytes;
7174   coding->src_pos = -chars;
7175   coding->src_pos_byte = -bytes;
7176   coding->src_multibyte = chars < bytes;
7177   coding->dst_object = coding->src_object;
7178   coding->dst_pos = PT;
7179   coding->dst_pos_byte = PT_BYTE;
7180
7181   encode_coding (coding);
7182
7183   unbind_to (count, Qnil);
7184   return coding->result;
7185 }
7186
7187
7188 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7189    SRC_OBJECT into DST_OBJECT by coding context CODING.
7190
7191    SRC_OBJECT is a buffer, a string, or Qnil.
7192
7193    If it is a buffer, the text is at point of the buffer.  FROM and TO
7194    are positions in the buffer.
7195
7196    If it is a string, the text is at the beginning of the string.
7197    FROM and TO are indices to the string.
7198
7199    If it is nil, the text is at coding->source.  FROM and TO are
7200    indices to coding->source.
7201
7202    DST_OBJECT is a buffer, Qt, or Qnil.
7203
7204    If it is a buffer, the decoded text is inserted at point of the
7205    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7206    is deleted.
7207
7208    If it is Qt, a string is made from the decoded text, and
7209    set in CODING->dst_object.
7210
7211    If it is Qnil, the decoded text is stored at CODING->destination.
7212    The caller must allocate CODING->dst_bytes bytes at
7213    CODING->destination by xmalloc.  If the decoded text is longer than
7214    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7215  */
7216
7217 void
7218 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7219                       dst_object)
7220      struct coding_system *coding;
7221      Lisp_Object src_object;
7222      EMACS_INT from, from_byte, to, to_byte;
7223      Lisp_Object dst_object;
7224 {
7225   int count = specpdl_ptr - specpdl;
7226   unsigned char *destination;
7227   EMACS_INT dst_bytes;
7228   EMACS_INT chars = to - from;
7229   EMACS_INT bytes = to_byte - from_byte;
7230   Lisp_Object attrs;
7231   int saved_pt = -1, saved_pt_byte;
7232   int need_marker_adjustment = 0;
7233   Lisp_Object old_deactivate_mark;
7234
7235   old_deactivate_mark = Vdeactivate_mark;
7236
7237   if (NILP (dst_object))
7238     {
7239       destination = coding->destination;
7240       dst_bytes = coding->dst_bytes;
7241     }
7242
7243   coding->src_object = src_object;
7244   coding->src_chars = chars;
7245   coding->src_bytes = bytes;
7246   coding->src_multibyte = chars < bytes;
7247
7248   if (STRINGP (src_object))
7249     {
7250       coding->src_pos = from;
7251       coding->src_pos_byte = from_byte;
7252     }
7253   else if (BUFFERP (src_object))
7254     {
7255       set_buffer_internal (XBUFFER (src_object));
7256       if (from != GPT)
7257         move_gap_both (from, from_byte);
7258       if (EQ (src_object, dst_object))
7259         {
7260           struct Lisp_Marker *tail;
7261
7262           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7263             {
7264               tail->need_adjustment
7265                 = tail->charpos == (tail->insertion_type ? from : to);
7266               need_marker_adjustment |= tail->need_adjustment;
7267             }
7268           saved_pt = PT, saved_pt_byte = PT_BYTE;
7269           TEMP_SET_PT_BOTH (from, from_byte);
7270           current_buffer->text->inhibit_shrinking = 1;
7271           del_range_both (from, from_byte, to, to_byte, 1);
7272           coding->src_pos = -chars;
7273           coding->src_pos_byte = -bytes;
7274         }
7275       else
7276         {
7277           coding->src_pos = from;
7278           coding->src_pos_byte = from_byte;
7279         }
7280     }
7281
7282   if (CODING_REQUIRE_DETECTION (coding))
7283     detect_coding (coding);
7284   attrs = CODING_ID_ATTRS (coding->id);
7285
7286   if (EQ (dst_object, Qt)
7287       || (! NILP (CODING_ATTR_POST_READ (attrs))
7288           && NILP (dst_object)))
7289     {
7290       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7291       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7292       coding->dst_pos = BEG;
7293       coding->dst_pos_byte = BEG_BYTE;
7294     }
7295   else if (BUFFERP (dst_object))
7296     {
7297       code_conversion_save (0, 0);
7298       coding->dst_object = dst_object;
7299       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7300       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7301       coding->dst_multibyte
7302         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7303     }
7304   else
7305     {
7306       code_conversion_save (0, 0);
7307       coding->dst_object = Qnil;
7308       /* Most callers presume this will return a multibyte result, and they
7309          won't use `binary' or `raw-text' anyway, so let's not worry about
7310          CODING_FOR_UNIBYTE.  */
7311       coding->dst_multibyte = 1;
7312     }
7313
7314   decode_coding (coding);
7315
7316   if (BUFFERP (coding->dst_object))
7317     set_buffer_internal (XBUFFER (coding->dst_object));
7318
7319   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7320     {
7321       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7322       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7323       Lisp_Object val;
7324
7325       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7326       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7327               old_deactivate_mark);
7328       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7329                         make_number (coding->produced_char));
7330       UNGCPRO;
7331       CHECK_NATNUM (val);
7332       coding->produced_char += Z - prev_Z;
7333       coding->produced += Z_BYTE - prev_Z_BYTE;
7334     }
7335
7336   if (EQ (dst_object, Qt))
7337     {
7338       coding->dst_object = Fbuffer_string ();
7339     }
7340   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7341     {
7342       set_buffer_internal (XBUFFER (coding->dst_object));
7343       if (dst_bytes < coding->produced)
7344         {
7345           destination = xrealloc (destination, coding->produced);
7346           if (! destination)
7347             {
7348               record_conversion_result (coding,
7349                                         CODING_RESULT_INSUFFICIENT_DST);
7350               unbind_to (count, Qnil);
7351               return;
7352             }
7353           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7354             move_gap_both (BEGV, BEGV_BYTE);
7355           bcopy (BEGV_ADDR, destination, coding->produced);
7356           coding->destination = destination;
7357         }
7358     }
7359
7360   if (saved_pt >= 0)
7361     {
7362       /* This is the case of:
7363          (BUFFERP (src_object) && EQ (src_object, dst_object))
7364          As we have moved PT while replacing the original buffer
7365          contents, we must recover it now.  */
7366       set_buffer_internal (XBUFFER (src_object));
7367       current_buffer->text->inhibit_shrinking = 0;
7368       if (saved_pt < from)
7369         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7370       else if (saved_pt < from + chars)
7371         TEMP_SET_PT_BOTH (from, from_byte);
7372       else if (! NILP (current_buffer->enable_multibyte_characters))
7373         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7374                           saved_pt_byte + (coding->produced - bytes));
7375       else
7376         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7377                           saved_pt_byte + (coding->produced - bytes));
7378
7379       if (need_marker_adjustment)
7380         {
7381           struct Lisp_Marker *tail;
7382
7383           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7384             if (tail->need_adjustment)
7385               {
7386                 tail->need_adjustment = 0;
7387                 if (tail->insertion_type)
7388                   {
7389                     tail->bytepos = from_byte;
7390                     tail->charpos = from;
7391                   }
7392                 else
7393                   {
7394                     tail->bytepos = from_byte + coding->produced;
7395                     tail->charpos
7396                       = (NILP (current_buffer->enable_multibyte_characters)
7397                          ? tail->bytepos : from + coding->produced_char);
7398                   }
7399               }
7400         }
7401     }
7402
7403   Vdeactivate_mark = old_deactivate_mark;
7404   unbind_to (count, coding->dst_object);
7405 }
7406
7407
7408 void
7409 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7410                       dst_object)
7411      struct coding_system *coding;
7412      Lisp_Object src_object;
7413      EMACS_INT from, from_byte, to, to_byte;
7414      Lisp_Object dst_object;
7415 {
7416   int count = specpdl_ptr - specpdl;
7417   EMACS_INT chars = to - from;
7418   EMACS_INT bytes = to_byte - from_byte;
7419   Lisp_Object attrs;
7420   int saved_pt = -1, saved_pt_byte;
7421   int need_marker_adjustment = 0;
7422   int kill_src_buffer = 0;
7423   Lisp_Object old_deactivate_mark;
7424
7425   old_deactivate_mark = Vdeactivate_mark;
7426
7427   coding->src_object = src_object;
7428   coding->src_chars = chars;
7429   coding->src_bytes = bytes;
7430   coding->src_multibyte = chars < bytes;
7431
7432   attrs = CODING_ID_ATTRS (coding->id);
7433
7434   if (EQ (src_object, dst_object))
7435     {
7436       struct Lisp_Marker *tail;
7437
7438       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7439         {
7440           tail->need_adjustment
7441             = tail->charpos == (tail->insertion_type ? from : to);
7442           need_marker_adjustment |= tail->need_adjustment;
7443         }
7444     }
7445
7446   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7447     {
7448       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7449       set_buffer_internal (XBUFFER (coding->src_object));
7450       if (STRINGP (src_object))
7451         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7452       else if (BUFFERP (src_object))
7453         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7454       else
7455         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7456
7457       if (EQ (src_object, dst_object))
7458         {
7459           set_buffer_internal (XBUFFER (src_object));
7460           saved_pt = PT, saved_pt_byte = PT_BYTE;
7461           del_range_both (from, from_byte, to, to_byte, 1);
7462           set_buffer_internal (XBUFFER (coding->src_object));
7463         }
7464
7465       {
7466         Lisp_Object args[3];
7467         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7468
7469         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7470                 old_deactivate_mark);
7471         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7472         args[1] = make_number (BEG);
7473         args[2] = make_number (Z);
7474         safe_call (3, args);
7475         UNGCPRO;
7476       }
7477       if (XBUFFER (coding->src_object) != current_buffer)
7478         kill_src_buffer = 1;
7479       coding->src_object = Fcurrent_buffer ();
7480       if (BEG != GPT)
7481         move_gap_both (BEG, BEG_BYTE);
7482       coding->src_chars = Z - BEG;
7483       coding->src_bytes = Z_BYTE - BEG_BYTE;
7484       coding->src_pos = BEG;
7485       coding->src_pos_byte = BEG_BYTE;
7486       coding->src_multibyte = Z < Z_BYTE;
7487     }
7488   else if (STRINGP (src_object))
7489     {
7490       code_conversion_save (0, 0);
7491       coding->src_pos = from;
7492       coding->src_pos_byte = from_byte;
7493     }
7494   else if (BUFFERP (src_object))
7495     {
7496       code_conversion_save (0, 0);
7497       set_buffer_internal (XBUFFER (src_object));
7498       if (EQ (src_object, dst_object))
7499         {
7500           saved_pt = PT, saved_pt_byte = PT_BYTE;
7501           coding->src_object = del_range_1 (from, to, 1, 1);
7502           coding->src_pos = 0;
7503           coding->src_pos_byte = 0;
7504         }
7505       else
7506         {
7507           if (from < GPT && to >= GPT)
7508             move_gap_both (from, from_byte);
7509           coding->src_pos = from;
7510           coding->src_pos_byte = from_byte;
7511         }
7512     }
7513   else
7514     code_conversion_save (0, 0);
7515
7516   if (BUFFERP (dst_object))
7517     {
7518       coding->dst_object = dst_object;
7519       if (EQ (src_object, dst_object))
7520         {
7521           coding->dst_pos = from;
7522           coding->dst_pos_byte = from_byte;
7523         }
7524       else
7525         {
7526           struct buffer *current = current_buffer;
7527
7528           set_buffer_temp (XBUFFER (dst_object));
7529           coding->dst_pos = PT;
7530           coding->dst_pos_byte = PT_BYTE;
7531           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7532           set_buffer_temp (current);
7533         }
7534       coding->dst_multibyte
7535         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7536     }
7537   else if (EQ (dst_object, Qt))
7538     {
7539       coding->dst_object = Qnil;
7540       coding->dst_bytes = coding->src_chars;
7541       if (coding->dst_bytes == 0)
7542         coding->dst_bytes = 1;
7543       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
7544       coding->dst_multibyte = 0;
7545     }
7546   else
7547     {
7548       coding->dst_object = Qnil;
7549       coding->dst_multibyte = 0;
7550     }
7551
7552   encode_coding (coding);
7553
7554   if (EQ (dst_object, Qt))
7555     {
7556       if (BUFFERP (coding->dst_object))
7557         coding->dst_object = Fbuffer_string ();
7558       else
7559         {
7560           coding->dst_object
7561             = make_unibyte_string ((char *) coding->destination,
7562                                    coding->produced);
7563           xfree (coding->destination);
7564         }
7565     }
7566
7567   if (saved_pt >= 0)
7568     {
7569       /* This is the case of:
7570          (BUFFERP (src_object) && EQ (src_object, dst_object))
7571          As we have moved PT while replacing the original buffer
7572          contents, we must recover it now.  */
7573       set_buffer_internal (XBUFFER (src_object));
7574       if (saved_pt < from)
7575         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7576       else if (saved_pt < from + chars)
7577         TEMP_SET_PT_BOTH (from, from_byte);
7578       else if (! NILP (current_buffer->enable_multibyte_characters))
7579         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7580                           saved_pt_byte + (coding->produced - bytes));
7581       else
7582         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7583                           saved_pt_byte + (coding->produced - bytes));
7584
7585       if (need_marker_adjustment)
7586         {
7587           struct Lisp_Marker *tail;
7588
7589           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7590             if (tail->need_adjustment)
7591               {
7592                 tail->need_adjustment = 0;
7593                 if (tail->insertion_type)
7594                   {
7595                     tail->bytepos = from_byte;
7596                     tail->charpos = from;
7597                   }
7598                 else
7599                   {
7600                     tail->bytepos = from_byte + coding->produced;
7601                     tail->charpos
7602                       = (NILP (current_buffer->enable_multibyte_characters)
7603                          ? tail->bytepos : from + coding->produced_char);
7604                   }
7605               }
7606         }
7607     }
7608
7609   if (kill_src_buffer)
7610     Fkill_buffer (coding->src_object);
7611
7612   Vdeactivate_mark = old_deactivate_mark;
7613   unbind_to (count, Qnil);
7614 }
7615
7616
7617 Lisp_Object
7618 preferred_coding_system ()
7619 {
7620   int id = coding_categories[coding_priorities[0]].id;
7621
7622   return CODING_ID_NAME (id);
7623 }
7624
7625 \f
7626 #ifdef emacs
7627 /*** 8. Emacs Lisp library functions ***/
7628
7629 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
7630        doc: /* Return t if OBJECT is nil or a coding-system.
7631 See the documentation of `define-coding-system' for information
7632 about coding-system objects.  */)
7633      (object)
7634      Lisp_Object object;
7635 {
7636   if (NILP (object)
7637       || CODING_SYSTEM_ID (object) >= 0)
7638     return Qt;
7639   if (! SYMBOLP (object)
7640       || NILP (Fget (object, Qcoding_system_define_form)))
7641     return Qnil;
7642   return Qt;
7643 }
7644
7645 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7646        Sread_non_nil_coding_system, 1, 1, 0,
7647        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
7648      (prompt)
7649      Lisp_Object prompt;
7650 {
7651   Lisp_Object val;
7652   do
7653     {
7654       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7655                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
7656     }
7657   while (SCHARS (val) == 0);
7658   return (Fintern (val, Qnil));
7659 }
7660
7661 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
7662        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7663 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7664 Ignores case when completing coding systems (all Emacs coding systems
7665 are lower-case).  */)
7666      (prompt, default_coding_system)
7667      Lisp_Object prompt, default_coding_system;
7668 {
7669   Lisp_Object val;
7670   int count = SPECPDL_INDEX ();
7671
7672   if (SYMBOLP (default_coding_system))
7673     default_coding_system = SYMBOL_NAME (default_coding_system);
7674   specbind (Qcompletion_ignore_case, Qt);
7675   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7676                           Qt, Qnil, Qcoding_system_history,
7677                           default_coding_system, Qnil);
7678   unbind_to (count, Qnil);
7679   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
7680 }
7681
7682 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7683        1, 1, 0,
7684        doc: /* Check validity of CODING-SYSTEM.
7685 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7686 It is valid if it is nil or a symbol defined as a coding system by the
7687 function `define-coding-system'.  */)
7688   (coding_system)
7689      Lisp_Object coding_system;
7690 {
7691   Lisp_Object define_form;
7692
7693   define_form = Fget (coding_system, Qcoding_system_define_form);
7694   if (! NILP (define_form))
7695     {
7696       Fput (coding_system, Qcoding_system_define_form, Qnil);
7697       safe_eval (define_form);
7698     }
7699   if (!NILP (Fcoding_system_p (coding_system)))
7700     return coding_system;
7701   xsignal1 (Qcoding_system_error, coding_system);
7702 }
7703
7704 \f
7705 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
7706    HIGHEST is nonzero, return the coding system of the highest
7707    priority among the detected coding systems.  Otherwize return a
7708    list of detected coding systems sorted by their priorities.  If
7709    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7710    multibyte form but contains only ASCII and eight-bit chars.
7711    Otherwise, the bytes are raw bytes.
7712
7713    CODING-SYSTEM controls the detection as below:
7714
7715    If it is nil, detect both text-format and eol-format.  If the
7716    text-format part of CODING-SYSTEM is already specified
7717    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
7718    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7719    detect only text-format.  */
7720
7721 Lisp_Object
7722 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7723                       coding_system)
7724      const unsigned char *src;
7725      EMACS_INT src_chars, src_bytes;
7726      int highest;
7727      int multibytep;
7728      Lisp_Object coding_system;
7729 {
7730   const unsigned char *src_end = src + src_bytes;
7731   Lisp_Object attrs, eol_type;
7732   Lisp_Object val = Qnil;
7733   struct coding_system coding;
7734   int id;
7735   struct coding_detection_info detect_info;
7736   enum coding_category base_category;
7737   int null_byte_found = 0, eight_bit_found = 0;
7738
7739   if (NILP (coding_system))
7740     coding_system = Qundecided;
7741   setup_coding_system (coding_system, &coding);
7742   attrs = CODING_ID_ATTRS (coding.id);
7743   eol_type = CODING_ID_EOL_TYPE (coding.id);
7744   coding_system = CODING_ATTR_BASE_NAME (attrs);
7745
7746   coding.source = src;
7747   coding.src_chars = src_chars;
7748   coding.src_bytes = src_bytes;
7749   coding.src_multibyte = multibytep;
7750   coding.consumed = 0;
7751   coding.mode |= CODING_MODE_LAST_BLOCK;
7752   coding.head_ascii = 0;
7753
7754   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7755
7756   /* At first, detect text-format if necessary.  */
7757   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7758   if (base_category == coding_category_undecided)
7759     {
7760       enum coding_category category;
7761       struct coding_system *this;
7762       int c, i;
7763
7764       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7765       for (; src < src_end; src++)
7766         {
7767           c = *src;
7768           if (c & 0x80)
7769             {
7770               eight_bit_found = 1;
7771               if (null_byte_found)
7772                 break;
7773             }
7774           else if (c < 0x20)
7775             {
7776               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7777                   && ! inhibit_iso_escape_detection
7778                   && ! detect_info.checked)
7779                 {
7780                   if (detect_coding_iso_2022 (&coding, &detect_info))
7781                     {
7782                       /* We have scanned the whole data.  */
7783                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7784                         {
7785                           /* We didn't find an 8-bit code.  We may
7786                              have found a null-byte, but it's very
7787                              rare that a binary file confirm to
7788                              ISO-2022.  */
7789                           src = src_end;
7790                           coding.head_ascii = src - coding.source;
7791                         }
7792                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
7793                       break;
7794                     }
7795                 }
7796               else if (! c && !inhibit_null_byte_detection)
7797                 {
7798                   null_byte_found = 1;
7799                   if (eight_bit_found)
7800                     break;
7801                 }
7802               if (! eight_bit_found)
7803                 coding.head_ascii++;
7804             }
7805           else if (! eight_bit_found)
7806             coding.head_ascii++;
7807         }
7808
7809       if (null_byte_found || eight_bit_found
7810           || coding.head_ascii < coding.src_bytes
7811           || detect_info.found)
7812         {
7813           if (coding.head_ascii == coding.src_bytes)
7814             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
7815             for (i = 0; i < coding_category_raw_text; i++)
7816               {
7817                 category = coding_priorities[i];
7818                 this = coding_categories + category;
7819                 if (detect_info.found & (1 << category))
7820                   break;
7821               }
7822           else
7823             {
7824               if (null_byte_found)
7825                 {
7826                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
7827                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
7828                 }
7829               for (i = 0; i < coding_category_raw_text; i++)
7830                 {
7831                   category = coding_priorities[i];
7832                   this = coding_categories + category;
7833
7834                   if (this->id < 0)
7835                     {
7836                       /* No coding system of this category is defined.  */
7837                       detect_info.rejected |= (1 << category);
7838                     }
7839                   else if (category >= coding_category_raw_text)
7840                     continue;
7841                   else if (detect_info.checked & (1 << category))
7842                     {
7843                       if (highest
7844                           && (detect_info.found & (1 << category)))
7845                         break;
7846                     }
7847                   else if ((*(this->detector)) (&coding, &detect_info)
7848                            && highest
7849                            && (detect_info.found & (1 << category)))
7850                     {
7851                       if (category == coding_category_utf_16_auto)
7852                         {
7853                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7854                             category = coding_category_utf_16_le;
7855                           else
7856                             category = coding_category_utf_16_be;
7857                         }
7858                       break;
7859                     }
7860                 }
7861             }
7862         }
7863
7864       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY)
7865         {
7866           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7867           id = coding_categories[coding_category_raw_text].id;
7868           val = Fcons (make_number (id), Qnil);
7869         }
7870       else if (! detect_info.rejected && ! detect_info.found)
7871         {
7872           detect_info.found = CATEGORY_MASK_ANY;
7873           id = coding_categories[coding_category_undecided].id;
7874           val = Fcons (make_number (id), Qnil);
7875         }
7876       else if (highest)
7877         {
7878           if (detect_info.found)
7879             {
7880               detect_info.found = 1 << category;
7881               val = Fcons (make_number (this->id), Qnil);
7882             }
7883           else
7884             for (i = 0; i < coding_category_raw_text; i++)
7885               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7886                 {
7887                   detect_info.found = 1 << coding_priorities[i];
7888                   id = coding_categories[coding_priorities[i]].id;
7889                   val = Fcons (make_number (id), Qnil);
7890                   break;
7891                 }
7892         }
7893       else
7894         {
7895           int mask = detect_info.rejected | detect_info.found;
7896           int found = 0;
7897
7898           for (i = coding_category_raw_text - 1; i >= 0; i--)
7899             {
7900               category = coding_priorities[i];
7901               if (! (mask & (1 << category)))
7902                 {
7903                   found |= 1 << category;
7904                   id = coding_categories[category].id;
7905                   if (id >= 0)
7906                     val = Fcons (make_number (id), val);
7907                 }
7908             }
7909           for (i = coding_category_raw_text - 1; i >= 0; i--)
7910             {
7911               category = coding_priorities[i];
7912               if (detect_info.found & (1 << category))
7913                 {
7914                   id = coding_categories[category].id;
7915                   val = Fcons (make_number (id), val);
7916                 }
7917             }
7918           detect_info.found |= found;
7919         }
7920     }
7921   else if (base_category == coding_category_utf_8_auto)
7922     {
7923       if (detect_coding_utf_8 (&coding, &detect_info))
7924         {
7925           struct coding_system *this;
7926
7927           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
7928             this = coding_categories + coding_category_utf_8_sig;
7929           else
7930             this = coding_categories + coding_category_utf_8_nosig;
7931           val = Fcons (make_number (this->id), Qnil);
7932         }
7933     }
7934   else if (base_category == coding_category_utf_16_auto)
7935     {
7936       if (detect_coding_utf_16 (&coding, &detect_info))
7937         {
7938           struct coding_system *this;
7939
7940           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7941             this = coding_categories + coding_category_utf_16_le;
7942           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7943             this = coding_categories + coding_category_utf_16_be;
7944           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7945             this = coding_categories + coding_category_utf_16_be_nosig;
7946           else
7947             this = coding_categories + coding_category_utf_16_le_nosig;
7948           val = Fcons (make_number (this->id), Qnil);
7949         }
7950     }
7951   else
7952     {
7953       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7954       val = Fcons (make_number (coding.id), Qnil);
7955     }
7956
7957   /* Then, detect eol-format if necessary.  */
7958   {
7959     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
7960     Lisp_Object tail;
7961
7962     if (VECTORP (eol_type))
7963       {
7964         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7965           {
7966             if (null_byte_found)
7967               normal_eol = EOL_SEEN_LF;
7968             else
7969               normal_eol = detect_eol (coding.source, src_bytes,
7970                                        coding_category_raw_text);
7971           }
7972         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7973                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7974           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7975                                       coding_category_utf_16_be);
7976         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7977                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7978           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7979                                       coding_category_utf_16_le);
7980       }
7981     else
7982       {
7983         if (EQ (eol_type, Qunix))
7984           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7985         else if (EQ (eol_type, Qdos))
7986           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7987         else
7988           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7989       }
7990
7991     for (tail = val; CONSP (tail); tail = XCDR (tail))
7992       {
7993         enum coding_category category;
7994         int this_eol;
7995
7996         id = XINT (XCAR (tail));
7997         attrs = CODING_ID_ATTRS (id);
7998         category = XINT (CODING_ATTR_CATEGORY (attrs));
7999         eol_type = CODING_ID_EOL_TYPE (id);
8000         if (VECTORP (eol_type))
8001           {
8002             if (category == coding_category_utf_16_be
8003                 || category == coding_category_utf_16_be_nosig)
8004               this_eol = utf_16_be_eol;
8005             else if (category == coding_category_utf_16_le
8006                      || category == coding_category_utf_16_le_nosig)
8007               this_eol = utf_16_le_eol;
8008             else
8009               this_eol = normal_eol;
8010
8011             if (this_eol == EOL_SEEN_LF)
8012               XSETCAR (tail, AREF (eol_type, 0));
8013             else if (this_eol == EOL_SEEN_CRLF)
8014               XSETCAR (tail, AREF (eol_type, 1));
8015             else if (this_eol == EOL_SEEN_CR)
8016               XSETCAR (tail, AREF (eol_type, 2));
8017             else
8018               XSETCAR (tail, CODING_ID_NAME (id));
8019           }
8020         else
8021           XSETCAR (tail, CODING_ID_NAME (id));
8022       }
8023   }
8024
8025   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8026 }
8027
8028
8029 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8030        2, 3, 0,
8031        doc: /* Detect coding system of the text in the region between START and END.
8032 Return a list of possible coding systems ordered by priority.
8033
8034 If only ASCII characters are found (except for such ISO-2022 control
8035 characters as ESC), it returns a list of single element `undecided'
8036 or its subsidiary coding system according to a detected end-of-line
8037 format.
8038
8039 If optional argument HIGHEST is non-nil, return the coding system of
8040 highest priority.  */)
8041      (start, end, highest)
8042      Lisp_Object start, end, highest;
8043 {
8044   int from, to;
8045   int from_byte, to_byte;
8046
8047   CHECK_NUMBER_COERCE_MARKER (start);
8048   CHECK_NUMBER_COERCE_MARKER (end);
8049
8050   validate_region (&start, &end);
8051   from = XINT (start), to = XINT (end);
8052   from_byte = CHAR_TO_BYTE (from);
8053   to_byte = CHAR_TO_BYTE (to);
8054
8055   if (from < GPT && to >= GPT)
8056     move_gap_both (to, to_byte);
8057
8058   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8059                                to - from, to_byte - from_byte,
8060                                !NILP (highest),
8061                                !NILP (current_buffer
8062                                       ->enable_multibyte_characters),
8063                                Qnil);
8064 }
8065
8066 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8067        1, 2, 0,
8068        doc: /* Detect coding system of the text in STRING.
8069 Return a list of possible coding systems ordered by priority.
8070
8071 If only ASCII characters are found (except for such ISO-2022 control
8072 characters as ESC), it returns a list of single element `undecided'
8073 or its subsidiary coding system according to a detected end-of-line
8074 format.
8075
8076 If optional argument HIGHEST is non-nil, return the coding system of
8077 highest priority.  */)
8078      (string, highest)
8079      Lisp_Object string, highest;
8080 {
8081   CHECK_STRING (string);
8082
8083   return detect_coding_system (SDATA (string),
8084                                SCHARS (string), SBYTES (string),
8085                                !NILP (highest), STRING_MULTIBYTE (string),
8086                                Qnil);
8087 }
8088
8089
8090 static INLINE int
8091 char_encodable_p (c, attrs)
8092      int c;
8093      Lisp_Object attrs;
8094 {
8095   Lisp_Object tail;
8096   struct charset *charset;
8097   Lisp_Object translation_table;
8098
8099   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8100   if (! NILP (translation_table))
8101     c = translate_char (translation_table, c);
8102   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8103        CONSP (tail); tail = XCDR (tail))
8104     {
8105       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8106       if (CHAR_CHARSET_P (c, charset))
8107         break;
8108     }
8109   return (! NILP (tail));
8110 }
8111
8112
8113 /* Return a list of coding systems that safely encode the text between
8114    START and END.  If EXCLUDE is non-nil, it is a list of coding
8115    systems not to check.  The returned list doesn't contain any such
8116    coding systems.  In any case, if the text contains only ASCII or is
8117    unibyte, return t.  */
8118
8119 DEFUN ("find-coding-systems-region-internal",
8120        Ffind_coding_systems_region_internal,
8121        Sfind_coding_systems_region_internal, 2, 3, 0,
8122        doc: /* Internal use only.  */)
8123      (start, end, exclude)
8124      Lisp_Object start, end, exclude;
8125 {
8126   Lisp_Object coding_attrs_list, safe_codings;
8127   EMACS_INT start_byte, end_byte;
8128   const unsigned char *p, *pbeg, *pend;
8129   int c;
8130   Lisp_Object tail, elt;
8131
8132   if (STRINGP (start))
8133     {
8134       if (!STRING_MULTIBYTE (start)
8135           || SCHARS (start) == SBYTES (start))
8136         return Qt;
8137       start_byte = 0;
8138       end_byte = SBYTES (start);
8139     }
8140   else
8141     {
8142       CHECK_NUMBER_COERCE_MARKER (start);
8143       CHECK_NUMBER_COERCE_MARKER (end);
8144       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8145         args_out_of_range (start, end);
8146       if (NILP (current_buffer->enable_multibyte_characters))
8147         return Qt;
8148       start_byte = CHAR_TO_BYTE (XINT (start));
8149       end_byte = CHAR_TO_BYTE (XINT (end));
8150       if (XINT (end) - XINT (start) == end_byte - start_byte)
8151         return Qt;
8152
8153       if (XINT (start) < GPT && XINT (end) > GPT)
8154         {
8155           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8156             move_gap_both (XINT (start), start_byte);
8157           else
8158             move_gap_both (XINT (end), end_byte);
8159         }
8160     }
8161
8162   coding_attrs_list = Qnil;
8163   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8164     if (NILP (exclude)
8165         || NILP (Fmemq (XCAR (tail), exclude)))
8166       {
8167         Lisp_Object attrs;
8168
8169         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8170         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8171             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8172           {
8173             ASET (attrs, coding_attr_trans_tbl,
8174                   get_translation_table (attrs, 1, NULL));
8175             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8176           }
8177       }
8178
8179   if (STRINGP (start))
8180     p = pbeg = SDATA (start);
8181   else
8182     p = pbeg = BYTE_POS_ADDR (start_byte);
8183   pend = p + (end_byte - start_byte);
8184
8185   while (p < pend && ASCII_BYTE_P (*p)) p++;
8186   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8187
8188   while (p < pend)
8189     {
8190       if (ASCII_BYTE_P (*p))
8191         p++;
8192       else
8193         {
8194           c = STRING_CHAR_ADVANCE (p);
8195
8196           charset_map_loaded = 0;
8197           for (tail = coding_attrs_list; CONSP (tail);)
8198             {
8199               elt = XCAR (tail);
8200               if (NILP (elt))
8201                 tail = XCDR (tail);
8202               else if (char_encodable_p (c, elt))
8203                 tail = XCDR (tail);
8204               else if (CONSP (XCDR (tail)))
8205                 {
8206                   XSETCAR (tail, XCAR (XCDR (tail)));
8207                   XSETCDR (tail, XCDR (XCDR (tail)));
8208                 }
8209               else
8210                 {
8211                   XSETCAR (tail, Qnil);
8212                   tail = XCDR (tail);
8213                 }
8214             }
8215           if (charset_map_loaded)
8216             {
8217               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8218
8219               if (STRINGP (start))
8220                 pbeg = SDATA (start);
8221               else
8222                 pbeg = BYTE_POS_ADDR (start_byte);
8223               p = pbeg + p_offset;
8224               pend = pbeg + pend_offset;
8225             }
8226         }
8227     }
8228
8229   safe_codings = list2 (Qraw_text, Qno_conversion);
8230   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8231     if (! NILP (XCAR (tail)))
8232       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8233
8234   return safe_codings;
8235 }
8236
8237
8238 DEFUN ("unencodable-char-position", Funencodable_char_position,
8239        Sunencodable_char_position, 3, 5, 0,
8240        doc: /*
8241 Return position of first un-encodable character in a region.
8242 START and END specify the region and CODING-SYSTEM specifies the
8243 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8244
8245 If optional 4th argument COUNT is non-nil, it specifies at most how
8246 many un-encodable characters to search.  In this case, the value is a
8247 list of positions.
8248
8249 If optional 5th argument STRING is non-nil, it is a string to search
8250 for un-encodable characters.  In that case, START and END are indexes
8251 to the string.  */)
8252      (start, end, coding_system, count, string)
8253      Lisp_Object start, end, coding_system, count, string;
8254 {
8255   int n;
8256   struct coding_system coding;
8257   Lisp_Object attrs, charset_list, translation_table;
8258   Lisp_Object positions;
8259   int from, to;
8260   const unsigned char *p, *stop, *pend;
8261   int ascii_compatible;
8262
8263   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8264   attrs = CODING_ID_ATTRS (coding.id);
8265   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8266     return Qnil;
8267   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8268   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8269   translation_table = get_translation_table (attrs, 1, NULL);
8270
8271   if (NILP (string))
8272     {
8273       validate_region (&start, &end);
8274       from = XINT (start);
8275       to = XINT (end);
8276       if (NILP (current_buffer->enable_multibyte_characters)
8277           || (ascii_compatible
8278               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8279         return Qnil;
8280       p = CHAR_POS_ADDR (from);
8281       pend = CHAR_POS_ADDR (to);
8282       if (from < GPT && to >= GPT)
8283         stop = GPT_ADDR;
8284       else
8285         stop = pend;
8286     }
8287   else
8288     {
8289       CHECK_STRING (string);
8290       CHECK_NATNUM (start);
8291       CHECK_NATNUM (end);
8292       from = XINT (start);
8293       to = XINT (end);
8294       if (from > to
8295           || to > SCHARS (string))
8296         args_out_of_range_3 (string, start, end);
8297       if (! STRING_MULTIBYTE (string))
8298         return Qnil;
8299       p = SDATA (string) + string_char_to_byte (string, from);
8300       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8301       if (ascii_compatible && (to - from) == (pend - p))
8302         return Qnil;
8303     }
8304
8305   if (NILP (count))
8306     n = 1;
8307   else
8308     {
8309       CHECK_NATNUM (count);
8310       n = XINT (count);
8311     }
8312
8313   positions = Qnil;
8314   while (1)
8315     {
8316       int c;
8317
8318       if (ascii_compatible)
8319         while (p < stop && ASCII_BYTE_P (*p))
8320           p++, from++;
8321       if (p >= stop)
8322         {
8323           if (p >= pend)
8324             break;
8325           stop = pend;
8326           p = GAP_END_ADDR;
8327         }
8328
8329       c = STRING_CHAR_ADVANCE (p);
8330       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8331           && ! char_charset (translate_char (translation_table, c),
8332                              charset_list, NULL))
8333         {
8334           positions = Fcons (make_number (from), positions);
8335           n--;
8336           if (n == 0)
8337             break;
8338         }
8339
8340       from++;
8341     }
8342
8343   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8344 }
8345
8346
8347 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8348        Scheck_coding_systems_region, 3, 3, 0,
8349        doc: /* Check if the region is encodable by coding systems.
8350
8351 START and END are buffer positions specifying the region.
8352 CODING-SYSTEM-LIST is a list of coding systems to check.
8353
8354 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8355 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8356 whole region, POS0, POS1, ... are buffer positions where non-encodable
8357 characters are found.
8358
8359 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8360 value is nil.
8361
8362 START may be a string.  In that case, check if the string is
8363 encodable, and the value contains indices to the string instead of
8364 buffer positions.  END is ignored.  */)
8365      (start, end, coding_system_list)
8366      Lisp_Object start, end, coding_system_list;
8367 {
8368   Lisp_Object list;
8369   EMACS_INT start_byte, end_byte;
8370   int pos;
8371   const unsigned char *p, *pbeg, *pend;
8372   int c;
8373   Lisp_Object tail, elt, attrs;
8374
8375   if (STRINGP (start))
8376     {
8377       if (!STRING_MULTIBYTE (start)
8378           && SCHARS (start) != SBYTES (start))
8379         return Qnil;
8380       start_byte = 0;
8381       end_byte = SBYTES (start);
8382       pos = 0;
8383     }
8384   else
8385     {
8386       CHECK_NUMBER_COERCE_MARKER (start);
8387       CHECK_NUMBER_COERCE_MARKER (end);
8388       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8389         args_out_of_range (start, end);
8390       if (NILP (current_buffer->enable_multibyte_characters))
8391         return Qnil;
8392       start_byte = CHAR_TO_BYTE (XINT (start));
8393       end_byte = CHAR_TO_BYTE (XINT (end));
8394       if (XINT (end) - XINT (start) == end_byte - start_byte)
8395         return Qt;
8396
8397       if (XINT (start) < GPT && XINT (end) > GPT)
8398         {
8399           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8400             move_gap_both (XINT (start), start_byte);
8401           else
8402             move_gap_both (XINT (end), end_byte);
8403         }
8404       pos = XINT (start);
8405     }
8406
8407   list = Qnil;
8408   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8409     {
8410       elt = XCAR (tail);
8411       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8412       ASET (attrs, coding_attr_trans_tbl,
8413             get_translation_table (attrs, 1, NULL));
8414       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8415     }
8416
8417   if (STRINGP (start))
8418     p = pbeg = SDATA (start);
8419   else
8420     p = pbeg = BYTE_POS_ADDR (start_byte);
8421   pend = p + (end_byte - start_byte);
8422
8423   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8424   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8425
8426   while (p < pend)
8427     {
8428       if (ASCII_BYTE_P (*p))
8429         p++;
8430       else
8431         {
8432           c = STRING_CHAR_ADVANCE (p);
8433
8434           charset_map_loaded = 0;
8435           for (tail = list; CONSP (tail); tail = XCDR (tail))
8436             {
8437               elt = XCDR (XCAR (tail));
8438               if (! char_encodable_p (c, XCAR (elt)))
8439                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8440             }
8441           if (charset_map_loaded)
8442             {
8443               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8444
8445               if (STRINGP (start))
8446                 pbeg = SDATA (start);
8447               else
8448                 pbeg = BYTE_POS_ADDR (start_byte);
8449               p = pbeg + p_offset;
8450               pend = pbeg + pend_offset;
8451             }
8452         }
8453       pos++;
8454     }
8455
8456   tail = list;
8457   list = Qnil;
8458   for (; CONSP (tail); tail = XCDR (tail))
8459     {
8460       elt = XCAR (tail);
8461       if (CONSP (XCDR (XCDR (elt))))
8462         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8463                       list);
8464     }
8465
8466   return list;
8467 }
8468
8469
8470 Lisp_Object
8471 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8472      Lisp_Object start, end, coding_system, dst_object;
8473      int encodep, norecord;
8474 {
8475   struct coding_system coding;
8476   EMACS_INT from, from_byte, to, to_byte;
8477   Lisp_Object src_object;
8478
8479   CHECK_NUMBER_COERCE_MARKER (start);
8480   CHECK_NUMBER_COERCE_MARKER (end);
8481   if (NILP (coding_system))
8482     coding_system = Qno_conversion;
8483   else
8484     CHECK_CODING_SYSTEM (coding_system);
8485   src_object = Fcurrent_buffer ();
8486   if (NILP (dst_object))
8487     dst_object = src_object;
8488   else if (! EQ (dst_object, Qt))
8489     CHECK_BUFFER (dst_object);
8490
8491   validate_region (&start, &end);
8492   from = XFASTINT (start);
8493   from_byte = CHAR_TO_BYTE (from);
8494   to = XFASTINT (end);
8495   to_byte = CHAR_TO_BYTE (to);
8496
8497   setup_coding_system (coding_system, &coding);
8498   coding.mode |= CODING_MODE_LAST_BLOCK;
8499
8500   if (encodep)
8501     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8502                           dst_object);
8503   else
8504     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8505                           dst_object);
8506   if (! norecord)
8507     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8508
8509   return (BUFFERP (dst_object)
8510           ? make_number (coding.produced_char)
8511           : coding.dst_object);
8512 }
8513
8514
8515 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8516        3, 4, "r\nzCoding system: ",
8517        doc: /* Decode the current region from the specified coding system.
8518 When called from a program, takes four arguments:
8519         START, END, CODING-SYSTEM, and DESTINATION.
8520 START and END are buffer positions.
8521
8522 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8523 If nil, the region between START and END is replaced by the decoded text.
8524 If buffer, the decoded text is inserted in that buffer after point (point
8525 does not move).
8526 In those cases, the length of the decoded text is returned.
8527 If DESTINATION is t, the decoded text is returned.
8528
8529 This function sets `last-coding-system-used' to the precise coding system
8530 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8531 not fully specified.)  */)
8532      (start, end, coding_system, destination)
8533      Lisp_Object start, end, coding_system, destination;
8534 {
8535   return code_convert_region (start, end, coding_system, destination, 0, 0);
8536 }
8537
8538 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8539        3, 4, "r\nzCoding system: ",
8540        doc: /* Encode the current region by specified coding system.
8541 When called from a program, takes four arguments:
8542         START, END, CODING-SYSTEM and DESTINATION.
8543 START and END are buffer positions.
8544
8545 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8546 If nil, the region between START and END is replace by the encoded text.
8547 If buffer, the encoded text is inserted in that buffer after point (point
8548 does not move).
8549 In those cases, the length of the encoded text is returned.
8550 If DESTINATION is t, the encoded text is returned.
8551
8552 This function sets `last-coding-system-used' to the precise coding system
8553 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8554 not fully specified.)  */)
8555   (start, end, coding_system, destination)
8556      Lisp_Object start, end, coding_system, destination;
8557 {
8558   return code_convert_region (start, end, coding_system, destination, 1, 0);
8559 }
8560
8561 Lisp_Object
8562 code_convert_string (string, coding_system, dst_object,
8563                      encodep, nocopy, norecord)
8564      Lisp_Object string, coding_system, dst_object;
8565      int encodep, nocopy, norecord;
8566 {
8567   struct coding_system coding;
8568   EMACS_INT chars, bytes;
8569
8570   CHECK_STRING (string);
8571   if (NILP (coding_system))
8572     {
8573       if (! norecord)
8574         Vlast_coding_system_used = Qno_conversion;
8575       if (NILP (dst_object))
8576         return (nocopy ? Fcopy_sequence (string) : string);
8577     }
8578
8579   if (NILP (coding_system))
8580     coding_system = Qno_conversion;
8581   else
8582     CHECK_CODING_SYSTEM (coding_system);
8583   if (NILP (dst_object))
8584     dst_object = Qt;
8585   else if (! EQ (dst_object, Qt))
8586     CHECK_BUFFER (dst_object);
8587
8588   setup_coding_system (coding_system, &coding);
8589   coding.mode |= CODING_MODE_LAST_BLOCK;
8590   chars = SCHARS (string);
8591   bytes = SBYTES (string);
8592   if (encodep)
8593     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8594   else
8595     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8596   if (! norecord)
8597     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8598
8599   return (BUFFERP (dst_object)
8600           ? make_number (coding.produced_char)
8601           : coding.dst_object);
8602 }
8603
8604
8605 /* Encode or decode STRING according to CODING_SYSTEM.
8606    Do not set Vlast_coding_system_used.
8607
8608    This function is called only from macros DECODE_FILE and
8609    ENCODE_FILE, thus we ignore character composition.  */
8610
8611 Lisp_Object
8612 code_convert_string_norecord (string, coding_system, encodep)
8613      Lisp_Object string, coding_system;
8614      int encodep;
8615 {
8616   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
8617 }
8618
8619
8620 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8621        2, 4, 0,
8622        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8623
8624 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8625 if the decoding operation is trivial.
8626
8627 Optional fourth arg BUFFER non-nil means that the decoded text is
8628 inserted in that buffer after point (point does not move).  In this
8629 case, the return value is the length of the decoded text.
8630
8631 This function sets `last-coding-system-used' to the precise coding system
8632 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8633 not fully specified.)  */)
8634   (string, coding_system, nocopy, buffer)
8635      Lisp_Object string, coding_system, nocopy, buffer;
8636 {
8637   return code_convert_string (string, coding_system, buffer,
8638                               0, ! NILP (nocopy), 0);
8639 }
8640
8641 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8642        2, 4, 0,
8643        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8644
8645 Optional third arg NOCOPY non-nil means it is OK to return STRING
8646 itself if the encoding operation is trivial.
8647
8648 Optional fourth arg BUFFER non-nil means that the encoded text is
8649 inserted in that buffer after point (point does not move).  In this
8650 case, the return value is the length of the encoded text.
8651
8652 This function sets `last-coding-system-used' to the precise coding system
8653 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8654 not fully specified.)  */)
8655      (string, coding_system, nocopy, buffer)
8656      Lisp_Object string, coding_system, nocopy, buffer;
8657 {
8658   return code_convert_string (string, coding_system, buffer,
8659                               1, ! NILP (nocopy), 1);
8660 }
8661
8662 \f
8663 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
8664        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8665 Return the corresponding character.  */)
8666      (code)
8667      Lisp_Object code;
8668 {
8669   Lisp_Object spec, attrs, val;
8670   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8671   int c;
8672
8673   CHECK_NATNUM (code);
8674   c = XFASTINT (code);
8675   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8676   attrs = AREF (spec, 0);
8677
8678   if (ASCII_BYTE_P (c)
8679       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8680     return code;
8681
8682   val = CODING_ATTR_CHARSET_LIST (attrs);
8683   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8684   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8685   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
8686
8687   if (c <= 0x7F)
8688     charset = charset_roman;
8689   else if (c >= 0xA0 && c < 0xDF)
8690     {
8691       charset = charset_kana;
8692       c -= 0x80;
8693     }
8694   else
8695     {
8696       int s1 = c >> 8, s2 = c & 0xFF;
8697
8698       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8699           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8700         error ("Invalid code: %d", code);
8701       SJIS_TO_JIS (c);
8702       charset = charset_kanji;
8703     }
8704   c = DECODE_CHAR (charset, c);
8705   if (c < 0)
8706     error ("Invalid code: %d", code);
8707   return make_number (c);
8708 }
8709
8710
8711 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8712        doc: /* Encode a Japanese character CH to shift_jis encoding.
8713 Return the corresponding code in SJIS.  */)
8714      (ch)
8715     Lisp_Object ch;
8716 {
8717   Lisp_Object spec, attrs, charset_list;
8718   int c;
8719   struct charset *charset;
8720   unsigned code;
8721
8722   CHECK_CHARACTER (ch);
8723   c = XFASTINT (ch);
8724   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8725   attrs = AREF (spec, 0);
8726
8727   if (ASCII_CHAR_P (c)
8728       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8729     return ch;
8730
8731   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8732   charset = char_charset (c, charset_list, &code);
8733   if (code == CHARSET_INVALID_CODE (charset))
8734     error ("Can't encode by shift_jis encoding: %d", c);
8735   JIS_TO_SJIS (code);
8736
8737   return make_number (code);
8738 }
8739
8740 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
8741        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8742 Return the corresponding character.  */)
8743      (code)
8744      Lisp_Object code;
8745 {
8746   Lisp_Object spec, attrs, val;
8747   struct charset *charset_roman, *charset_big5, *charset;
8748   int c;
8749
8750   CHECK_NATNUM (code);
8751   c = XFASTINT (code);
8752   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8753   attrs = AREF (spec, 0);
8754
8755   if (ASCII_BYTE_P (c)
8756       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8757     return code;
8758
8759   val = CODING_ATTR_CHARSET_LIST (attrs);
8760   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8761   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
8762
8763   if (c <= 0x7F)
8764     charset = charset_roman;
8765   else
8766     {
8767       int b1 = c >> 8, b2 = c & 0x7F;
8768       if (b1 < 0xA1 || b1 > 0xFE
8769           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8770         error ("Invalid code: %d", code);
8771       charset = charset_big5;
8772     }
8773   c = DECODE_CHAR (charset, (unsigned )c);
8774   if (c < 0)
8775     error ("Invalid code: %d", code);
8776   return make_number (c);
8777 }
8778
8779 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8780        doc: /* Encode the Big5 character CH to BIG5 coding system.
8781 Return the corresponding character code in Big5.  */)
8782      (ch)
8783      Lisp_Object ch;
8784 {
8785   Lisp_Object spec, attrs, charset_list;
8786   struct charset *charset;
8787   int c;
8788   unsigned code;
8789
8790   CHECK_CHARACTER (ch);
8791   c = XFASTINT (ch);
8792   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8793   attrs = AREF (spec, 0);
8794   if (ASCII_CHAR_P (c)
8795       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8796     return ch;
8797
8798   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8799   charset = char_charset (c, charset_list, &code);
8800   if (code == CHARSET_INVALID_CODE (charset))
8801     error ("Can't encode by Big5 encoding: %d", c);
8802
8803   return make_number (code);
8804 }
8805
8806 \f
8807 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
8808        Sset_terminal_coding_system_internal, 1, 2, 0,
8809        doc: /* Internal use only.  */)
8810      (coding_system, terminal)
8811      Lisp_Object coding_system;
8812      Lisp_Object terminal;
8813 {
8814   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8815   CHECK_SYMBOL (coding_system);
8816   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
8817   /* We had better not send unsafe characters to terminal.  */
8818   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
8819   /* Characer composition should be disabled.  */
8820   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8821   terminal_coding->src_multibyte = 1;
8822   terminal_coding->dst_multibyte = 0;
8823   return Qnil;
8824 }
8825
8826 DEFUN ("set-safe-terminal-coding-system-internal",
8827        Fset_safe_terminal_coding_system_internal,
8828        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8829        doc: /* Internal use only.  */)
8830      (coding_system)
8831      Lisp_Object coding_system;
8832 {
8833   CHECK_SYMBOL (coding_system);
8834   setup_coding_system (Fcheck_coding_system (coding_system),
8835                        &safe_terminal_coding);
8836   /* Characer composition should be disabled.  */
8837   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8838   safe_terminal_coding.src_multibyte = 1;
8839   safe_terminal_coding.dst_multibyte = 0;
8840   return Qnil;
8841 }
8842
8843 DEFUN ("terminal-coding-system", Fterminal_coding_system,
8844        Sterminal_coding_system, 0, 1, 0,
8845        doc: /* Return coding system specified for terminal output on the given terminal.
8846 TERMINAL may be a terminal id, a frame, or nil for the selected
8847 frame's terminal device.  */)
8848      (terminal)
8849      Lisp_Object terminal;
8850 {
8851   struct coding_system *terminal_coding
8852     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8853   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
8854
8855   /* For backward compatibility, return nil if it is `undecided'. */
8856   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
8857 }
8858
8859 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
8860        Sset_keyboard_coding_system_internal, 1, 2, 0,
8861        doc: /* Internal use only.  */)
8862      (coding_system, terminal)
8863      Lisp_Object coding_system;
8864      Lisp_Object terminal;
8865 {
8866   struct terminal *t = get_terminal (terminal, 1);
8867   CHECK_SYMBOL (coding_system);
8868   setup_coding_system (Fcheck_coding_system (coding_system),
8869                        TERMINAL_KEYBOARD_CODING (t));
8870   /* Characer composition should be disabled.  */
8871   TERMINAL_KEYBOARD_CODING (t)->common_flags
8872     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8873   return Qnil;
8874 }
8875
8876 DEFUN ("keyboard-coding-system",
8877        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
8878        doc: /* Return coding system specified for decoding keyboard input.  */)
8879      (terminal)
8880      Lisp_Object terminal;
8881 {
8882   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8883                          (get_terminal (terminal, 1))->id);
8884 }
8885
8886 \f
8887 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8888        Sfind_operation_coding_system,  1, MANY, 0,
8889        doc: /* Choose a coding system for an operation based on the target name.
8890 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8891 DECODING-SYSTEM is the coding system to use for decoding
8892 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8893 for encoding (in case OPERATION does encoding).
8894
8895 The first argument OPERATION specifies an I/O primitive:
8896   For file I/O, `insert-file-contents' or `write-region'.
8897   For process I/O, `call-process', `call-process-region', or `start-process'.
8898   For network I/O, `open-network-stream'.
8899
8900 The remaining arguments should be the same arguments that were passed
8901 to the primitive.  Depending on which primitive, one of those arguments
8902 is selected as the TARGET.  For example, if OPERATION does file I/O,
8903 whichever argument specifies the file name is TARGET.
8904
8905 TARGET has a meaning which depends on OPERATION:
8906   For file I/O, TARGET is a file name (except for the special case below).
8907   For process I/O, TARGET is a process name.
8908   For network I/O, TARGET is a service name or a port number.
8909
8910 This function looks up what is specified for TARGET in
8911 `file-coding-system-alist', `process-coding-system-alist',
8912 or `network-coding-system-alist' depending on OPERATION.
8913 They may specify a coding system, a cons of coding systems,
8914 or a function symbol to call.
8915 In the last case, we call the function with one argument,
8916 which is a list of all the arguments given to this function.
8917 If the function can't decide a coding system, it can return
8918 `undecided' so that the normal code-detection is performed.
8919
8920 If OPERATION is `insert-file-contents', the argument corresponding to
8921 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
8922 file name to look up, and BUFFER is a buffer that contains the file's
8923 contents (not yet decoded).  If `file-coding-system-alist' specifies a
8924 function to call for FILENAME, that function should examine the
8925 contents of BUFFER instead of reading the file.
8926
8927 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
8928      (nargs, args)
8929      int nargs;
8930      Lisp_Object *args;
8931 {
8932   Lisp_Object operation, target_idx, target, val;
8933   register Lisp_Object chain;
8934
8935   if (nargs < 2)
8936     error ("Too few arguments");
8937   operation = args[0];
8938   if (!SYMBOLP (operation)
8939       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8940     error ("Invalid first argument");
8941   if (nargs < 1 + XINT (target_idx))
8942     error ("Too few arguments for operation: %s",
8943            SDATA (SYMBOL_NAME (operation)));
8944   target = args[XINT (target_idx) + 1];
8945   if (!(STRINGP (target)
8946         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8947             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
8948         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8949     error ("Invalid %dth argument", XINT (target_idx) + 1);
8950   if (CONSP (target))
8951     target = XCAR (target);
8952
8953   chain = ((EQ (operation, Qinsert_file_contents)
8954             || EQ (operation, Qwrite_region))
8955            ? Vfile_coding_system_alist
8956            : (EQ (operation, Qopen_network_stream)
8957               ? Vnetwork_coding_system_alist
8958               : Vprocess_coding_system_alist));
8959   if (NILP (chain))
8960     return Qnil;
8961
8962   for (; CONSP (chain); chain = XCDR (chain))
8963     {
8964       Lisp_Object elt;
8965
8966       elt = XCAR (chain);
8967       if (CONSP (elt)
8968           && ((STRINGP (target)
8969                && STRINGP (XCAR (elt))
8970                && fast_string_match (XCAR (elt), target) >= 0)
8971               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8972         {
8973           val = XCDR (elt);
8974           /* Here, if VAL is both a valid coding system and a valid
8975              function symbol, we return VAL as a coding system.  */
8976           if (CONSP (val))
8977             return val;
8978           if (! SYMBOLP (val))
8979             return Qnil;
8980           if (! NILP (Fcoding_system_p (val)))
8981             return Fcons (val, val);
8982           if (! NILP (Ffboundp (val)))
8983             {
8984               /* We use call1 rather than safe_call1
8985                  so as to get bug reports about functions called here
8986                  which don't handle the current interface.  */
8987               val = call1 (val, Flist (nargs, args));
8988               if (CONSP (val))
8989                 return val;
8990               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8991                 return Fcons (val, val);
8992             }
8993           return Qnil;
8994         }
8995     }
8996   return Qnil;
8997 }
8998
8999 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9000        Sset_coding_system_priority, 0, MANY, 0,
9001        doc: /* Assign higher priority to the coding systems given as arguments.
9002 If multiple coding systems belong to the same category,
9003 all but the first one are ignored.
9004
9005 usage: (set-coding-system-priority &rest coding-systems)  */)
9006      (nargs, args)
9007      int nargs;
9008      Lisp_Object *args;
9009 {
9010   int i, j;
9011   int changed[coding_category_max];
9012   enum coding_category priorities[coding_category_max];
9013
9014   bzero (changed, sizeof changed);
9015
9016   for (i = j = 0; i < nargs; i++)
9017     {
9018       enum coding_category category;
9019       Lisp_Object spec, attrs;
9020
9021       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9022       attrs = AREF (spec, 0);
9023       category = XINT (CODING_ATTR_CATEGORY (attrs));
9024       if (changed[category])
9025         /* Ignore this coding system because a coding system of the
9026            same category already had a higher priority.  */
9027         continue;
9028       changed[category] = 1;
9029       priorities[j++] = category;
9030       if (coding_categories[category].id >= 0
9031           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9032         setup_coding_system (args[i], &coding_categories[category]);
9033       Fset (AREF (Vcoding_category_table, category), args[i]);
9034     }
9035
9036   /* Now we have decided top J priorities.  Reflect the order of the
9037      original priorities to the remaining priorities.  */
9038
9039   for (i = j, j = 0; i < coding_category_max; i++, j++)
9040     {
9041       while (j < coding_category_max
9042              && changed[coding_priorities[j]])
9043         j++;
9044       if (j == coding_category_max)
9045         abort ();
9046       priorities[i] = coding_priorities[j];
9047     }
9048
9049   bcopy (priorities, coding_priorities, sizeof priorities);
9050
9051   /* Update `coding-category-list'.  */
9052   Vcoding_category_list = Qnil;
9053   for (i = coding_category_max - 1; i >= 0; i--)
9054     Vcoding_category_list
9055       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9056                Vcoding_category_list);
9057
9058   return Qnil;
9059 }
9060
9061 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9062        Scoding_system_priority_list, 0, 1, 0,
9063        doc: /* Return a list of coding systems ordered by their priorities.
9064 HIGHESTP non-nil means just return the highest priority one.  */)
9065      (highestp)
9066      Lisp_Object highestp;
9067 {
9068   int i;
9069   Lisp_Object val;
9070
9071   for (i = 0, val = Qnil; i < coding_category_max; i++)
9072     {
9073       enum coding_category category = coding_priorities[i];
9074       int id = coding_categories[category].id;
9075       Lisp_Object attrs;
9076
9077       if (id < 0)
9078         continue;
9079       attrs = CODING_ID_ATTRS (id);
9080       if (! NILP (highestp))
9081         return CODING_ATTR_BASE_NAME (attrs);
9082       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9083     }
9084   return Fnreverse (val);
9085 }
9086
9087 static char *suffixes[] = { "-unix", "-dos", "-mac" };
9088
9089 static Lisp_Object
9090 make_subsidiaries (base)
9091      Lisp_Object base;
9092 {
9093   Lisp_Object subsidiaries;
9094   int base_name_len = SBYTES (SYMBOL_NAME (base));
9095   char *buf = (char *) alloca (base_name_len + 6);
9096   int i;
9097
9098   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
9099   subsidiaries = Fmake_vector (make_number (3), Qnil);
9100   for (i = 0; i < 3; i++)
9101     {
9102       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
9103       ASET (subsidiaries, i, intern (buf));
9104     }
9105   return subsidiaries;
9106 }
9107
9108
9109 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9110        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9111        doc: /* For internal use only.
9112 usage: (define-coding-system-internal ...)  */)
9113      (nargs, args)
9114      int nargs;
9115      Lisp_Object *args;
9116 {
9117   Lisp_Object name;
9118   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9119   Lisp_Object attrs;            /* Vector of attributes.  */
9120   Lisp_Object eol_type;
9121   Lisp_Object aliases;
9122   Lisp_Object coding_type, charset_list, safe_charsets;
9123   enum coding_category category;
9124   Lisp_Object tail, val;
9125   int max_charset_id = 0;
9126   int i;
9127
9128   if (nargs < coding_arg_max)
9129     goto short_args;
9130
9131   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9132
9133   name = args[coding_arg_name];
9134   CHECK_SYMBOL (name);
9135   CODING_ATTR_BASE_NAME (attrs) = name;
9136
9137   val = args[coding_arg_mnemonic];
9138   if (! STRINGP (val))
9139     CHECK_CHARACTER (val);
9140   CODING_ATTR_MNEMONIC (attrs) = val;
9141
9142   coding_type = args[coding_arg_coding_type];
9143   CHECK_SYMBOL (coding_type);
9144   CODING_ATTR_TYPE (attrs) = coding_type;
9145
9146   charset_list = args[coding_arg_charset_list];
9147   if (SYMBOLP (charset_list))
9148     {
9149       if (EQ (charset_list, Qiso_2022))
9150         {
9151           if (! EQ (coding_type, Qiso_2022))
9152             error ("Invalid charset-list");
9153           charset_list = Viso_2022_charset_list;
9154         }
9155       else if (EQ (charset_list, Qemacs_mule))
9156         {
9157           if (! EQ (coding_type, Qemacs_mule))
9158             error ("Invalid charset-list");
9159           charset_list = Vemacs_mule_charset_list;
9160         }
9161       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9162         if (max_charset_id < XFASTINT (XCAR (tail)))
9163           max_charset_id = XFASTINT (XCAR (tail));
9164     }
9165   else
9166     {
9167       charset_list = Fcopy_sequence (charset_list);
9168       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9169         {
9170           struct charset *charset;
9171
9172           val = XCAR (tail);
9173           CHECK_CHARSET_GET_CHARSET (val, charset);
9174           if (EQ (coding_type, Qiso_2022)
9175               ? CHARSET_ISO_FINAL (charset) < 0
9176               : EQ (coding_type, Qemacs_mule)
9177               ? CHARSET_EMACS_MULE_ID (charset) < 0
9178               : 0)
9179             error ("Can't handle charset `%s'",
9180                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9181
9182           XSETCAR (tail, make_number (charset->id));
9183           if (max_charset_id < charset->id)
9184             max_charset_id = charset->id;
9185         }
9186     }
9187   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9188
9189   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
9190                                 make_number (255));
9191   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9192     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9193   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9194
9195   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9196
9197   val = args[coding_arg_decode_translation_table];
9198   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9199     CHECK_SYMBOL (val);
9200   CODING_ATTR_DECODE_TBL (attrs) = val;
9201
9202   val = args[coding_arg_encode_translation_table];
9203   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9204     CHECK_SYMBOL (val);
9205   CODING_ATTR_ENCODE_TBL (attrs) = val;
9206
9207   val = args[coding_arg_post_read_conversion];
9208   CHECK_SYMBOL (val);
9209   CODING_ATTR_POST_READ (attrs) = val;
9210
9211   val = args[coding_arg_pre_write_conversion];
9212   CHECK_SYMBOL (val);
9213   CODING_ATTR_PRE_WRITE (attrs) = val;
9214
9215   val = args[coding_arg_default_char];
9216   if (NILP (val))
9217     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9218   else
9219     {
9220       CHECK_CHARACTER (val);
9221       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9222     }
9223
9224   val = args[coding_arg_for_unibyte];
9225   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9226
9227   val = args[coding_arg_plist];
9228   CHECK_LIST (val);
9229   CODING_ATTR_PLIST (attrs) = val;
9230
9231   if (EQ (coding_type, Qcharset))
9232     {
9233       /* Generate a lisp vector of 256 elements.  Each element is nil,
9234          integer, or a list of charset IDs.
9235
9236          If Nth element is nil, the byte code N is invalid in this
9237          coding system.
9238
9239          If Nth element is a number NUM, N is the first byte of a
9240          charset whose ID is NUM.
9241
9242          If Nth element is a list of charset IDs, N is the first byte
9243          of one of them.  The list is sorted by dimensions of the
9244          charsets.  A charset of smaller dimension comes firtst. */
9245       val = Fmake_vector (make_number (256), Qnil);
9246
9247       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9248         {
9249           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9250           int dim = CHARSET_DIMENSION (charset);
9251           int idx = (dim - 1) * 4;
9252
9253           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9254             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9255
9256           for (i = charset->code_space[idx];
9257                i <= charset->code_space[idx + 1]; i++)
9258             {
9259               Lisp_Object tmp, tmp2;
9260               int dim2;
9261
9262               tmp = AREF (val, i);
9263               if (NILP (tmp))
9264                 tmp = XCAR (tail);
9265               else if (NUMBERP (tmp))
9266                 {
9267                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9268                   if (dim < dim2)
9269                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9270                   else
9271                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9272                 }
9273               else
9274                 {
9275                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9276                     {
9277                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9278                       if (dim < dim2)
9279                         break;
9280                     }
9281                   if (NILP (tmp2))
9282                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9283                   else
9284                     {
9285                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9286                       XSETCAR (tmp2, XCAR (tail));
9287                     }
9288                 }
9289               ASET (val, i, tmp);
9290             }
9291         }
9292       ASET (attrs, coding_attr_charset_valids, val);
9293       category = coding_category_charset;
9294     }
9295   else if (EQ (coding_type, Qccl))
9296     {
9297       Lisp_Object valids;
9298
9299       if (nargs < coding_arg_ccl_max)
9300         goto short_args;
9301
9302       val = args[coding_arg_ccl_decoder];
9303       CHECK_CCL_PROGRAM (val);
9304       if (VECTORP (val))
9305         val = Fcopy_sequence (val);
9306       ASET (attrs, coding_attr_ccl_decoder, val);
9307
9308       val = args[coding_arg_ccl_encoder];
9309       CHECK_CCL_PROGRAM (val);
9310       if (VECTORP (val))
9311         val = Fcopy_sequence (val);
9312       ASET (attrs, coding_attr_ccl_encoder, val);
9313
9314       val = args[coding_arg_ccl_valids];
9315       valids = Fmake_string (make_number (256), make_number (0));
9316       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9317         {
9318           int from, to;
9319
9320           val = Fcar (tail);
9321           if (INTEGERP (val))
9322             {
9323               from = to = XINT (val);
9324               if (from < 0 || from > 255)
9325                 args_out_of_range_3 (val, make_number (0), make_number (255));
9326             }
9327           else
9328             {
9329               CHECK_CONS (val);
9330               CHECK_NATNUM_CAR (val);
9331               CHECK_NATNUM_CDR (val);
9332               from = XINT (XCAR (val));
9333               if (from > 255)
9334                 args_out_of_range_3 (XCAR (val),
9335                                      make_number (0), make_number (255));
9336               to = XINT (XCDR (val));
9337               if (to < from || to > 255)
9338                 args_out_of_range_3 (XCDR (val),
9339                                      XCAR (val), make_number (255));
9340             }
9341           for (i = from; i <= to; i++)
9342             SSET (valids, i, 1);
9343         }
9344       ASET (attrs, coding_attr_ccl_valids, valids);
9345
9346       category = coding_category_ccl;
9347     }
9348   else if (EQ (coding_type, Qutf_16))
9349     {
9350       Lisp_Object bom, endian;
9351
9352       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9353
9354       if (nargs < coding_arg_utf16_max)
9355         goto short_args;
9356
9357       bom = args[coding_arg_utf16_bom];
9358       if (! NILP (bom) && ! EQ (bom, Qt))
9359         {
9360           CHECK_CONS (bom);
9361           val = XCAR (bom);
9362           CHECK_CODING_SYSTEM (val);
9363           val = XCDR (bom);
9364           CHECK_CODING_SYSTEM (val);
9365         }
9366       ASET (attrs, coding_attr_utf_bom, bom);
9367
9368       endian = args[coding_arg_utf16_endian];
9369       CHECK_SYMBOL (endian);
9370       if (NILP (endian))
9371         endian = Qbig;
9372       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9373         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9374       ASET (attrs, coding_attr_utf_16_endian, endian);
9375
9376       category = (CONSP (bom)
9377                   ? coding_category_utf_16_auto
9378                   : NILP (bom)
9379                   ? (EQ (endian, Qbig)
9380                      ? coding_category_utf_16_be_nosig
9381                      : coding_category_utf_16_le_nosig)
9382                   : (EQ (endian, Qbig)
9383                      ? coding_category_utf_16_be
9384                      : coding_category_utf_16_le));
9385     }
9386   else if (EQ (coding_type, Qiso_2022))
9387     {
9388       Lisp_Object initial, reg_usage, request, flags;
9389       int i;
9390
9391       if (nargs < coding_arg_iso2022_max)
9392         goto short_args;
9393
9394       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9395       CHECK_VECTOR (initial);
9396       for (i = 0; i < 4; i++)
9397         {
9398           val = Faref (initial, make_number (i));
9399           if (! NILP (val))
9400             {
9401               struct charset *charset;
9402
9403               CHECK_CHARSET_GET_CHARSET (val, charset);
9404               ASET (initial, i, make_number (CHARSET_ID (charset)));
9405               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9406                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9407             }
9408           else
9409             ASET (initial, i, make_number (-1));
9410         }
9411
9412       reg_usage = args[coding_arg_iso2022_reg_usage];
9413       CHECK_CONS (reg_usage);
9414       CHECK_NUMBER_CAR (reg_usage);
9415       CHECK_NUMBER_CDR (reg_usage);
9416
9417       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9418       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9419         {
9420           int id;
9421           Lisp_Object tmp;
9422
9423           val = Fcar (tail);
9424           CHECK_CONS (val);
9425           tmp = XCAR (val);
9426           CHECK_CHARSET_GET_ID (tmp, id);
9427           CHECK_NATNUM_CDR (val);
9428           if (XINT (XCDR (val)) >= 4)
9429             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9430           XSETCAR (val, make_number (id));
9431         }
9432
9433       flags = args[coding_arg_iso2022_flags];
9434       CHECK_NATNUM (flags);
9435       i = XINT (flags);
9436       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9437         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9438
9439       ASET (attrs, coding_attr_iso_initial, initial);
9440       ASET (attrs, coding_attr_iso_usage, reg_usage);
9441       ASET (attrs, coding_attr_iso_request, request);
9442       ASET (attrs, coding_attr_iso_flags, flags);
9443       setup_iso_safe_charsets (attrs);
9444
9445       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9446         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9447                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9448                     ? coding_category_iso_7_else
9449                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9450                     ? coding_category_iso_7
9451                     : coding_category_iso_7_tight);
9452       else
9453         {
9454           int id = XINT (AREF (initial, 1));
9455
9456           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9457                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9458                        || id < 0)
9459                       ? coding_category_iso_8_else
9460                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9461                       ? coding_category_iso_8_1
9462                       : coding_category_iso_8_2);
9463         }
9464       if (category != coding_category_iso_8_1
9465           && category != coding_category_iso_8_2)
9466         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9467     }
9468   else if (EQ (coding_type, Qemacs_mule))
9469     {
9470       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9471         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9472       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9473       category = coding_category_emacs_mule;
9474     }
9475   else if (EQ (coding_type, Qshift_jis))
9476     {
9477
9478       struct charset *charset;
9479
9480       if (XINT (Flength (charset_list)) != 3
9481           && XINT (Flength (charset_list)) != 4)
9482         error ("There should be three or four charsets");
9483
9484       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9485       if (CHARSET_DIMENSION (charset) != 1)
9486         error ("Dimension of charset %s is not one",
9487                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9488       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9489         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9490
9491       charset_list = XCDR (charset_list);
9492       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9493       if (CHARSET_DIMENSION (charset) != 1)
9494         error ("Dimension of charset %s is not one",
9495                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9496
9497       charset_list = XCDR (charset_list);
9498       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9499       if (CHARSET_DIMENSION (charset) != 2)
9500         error ("Dimension of charset %s is not two",
9501                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9502
9503       charset_list = XCDR (charset_list);
9504       if (! NILP (charset_list))
9505         {
9506           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9507           if (CHARSET_DIMENSION (charset) != 2)
9508             error ("Dimension of charset %s is not two",
9509                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9510         }
9511
9512       category = coding_category_sjis;
9513       Vsjis_coding_system = name;
9514     }
9515   else if (EQ (coding_type, Qbig5))
9516     {
9517       struct charset *charset;
9518
9519       if (XINT (Flength (charset_list)) != 2)
9520         error ("There should be just two charsets");
9521
9522       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9523       if (CHARSET_DIMENSION (charset) != 1)
9524         error ("Dimension of charset %s is not one",
9525                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9526       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9527         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9528
9529       charset_list = XCDR (charset_list);
9530       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9531       if (CHARSET_DIMENSION (charset) != 2)
9532         error ("Dimension of charset %s is not two",
9533                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9534
9535       category = coding_category_big5;
9536       Vbig5_coding_system = name;
9537     }
9538   else if (EQ (coding_type, Qraw_text))
9539     {
9540       category = coding_category_raw_text;
9541       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9542     }
9543   else if (EQ (coding_type, Qutf_8))
9544     {
9545       Lisp_Object bom;
9546
9547       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9548
9549       if (nargs < coding_arg_utf8_max)
9550         goto short_args;
9551
9552       bom = args[coding_arg_utf8_bom];
9553       if (! NILP (bom) && ! EQ (bom, Qt))
9554         {
9555           CHECK_CONS (bom);
9556           val = XCAR (bom);
9557           CHECK_CODING_SYSTEM (val);
9558           val = XCDR (bom);
9559           CHECK_CODING_SYSTEM (val);
9560         }
9561       ASET (attrs, coding_attr_utf_bom, bom);
9562
9563       category = (CONSP (bom) ? coding_category_utf_8_auto
9564                   : NILP (bom) ? coding_category_utf_8_nosig
9565                   : coding_category_utf_8_sig);
9566     }
9567   else if (EQ (coding_type, Qundecided))
9568     category = coding_category_undecided;
9569   else
9570     error ("Invalid coding system type: %s",
9571            SDATA (SYMBOL_NAME (coding_type)));
9572
9573   CODING_ATTR_CATEGORY (attrs) = make_number (category);
9574   CODING_ATTR_PLIST (attrs)
9575     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9576                                 CODING_ATTR_PLIST (attrs)));
9577   CODING_ATTR_PLIST (attrs)
9578     = Fcons (QCascii_compatible_p,
9579              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9580                     CODING_ATTR_PLIST (attrs)));
9581
9582   eol_type = args[coding_arg_eol_type];
9583   if (! NILP (eol_type)
9584       && ! EQ (eol_type, Qunix)
9585       && ! EQ (eol_type, Qdos)
9586       && ! EQ (eol_type, Qmac))
9587     error ("Invalid eol-type");
9588
9589   aliases = Fcons (name, Qnil);
9590
9591   if (NILP (eol_type))
9592     {
9593       eol_type = make_subsidiaries (name);
9594       for (i = 0; i < 3; i++)
9595         {
9596           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9597
9598           this_name = AREF (eol_type, i);
9599           this_aliases = Fcons (this_name, Qnil);
9600           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9601           this_spec = Fmake_vector (make_number (3), attrs);
9602           ASET (this_spec, 1, this_aliases);
9603           ASET (this_spec, 2, this_eol_type);
9604           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9605           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
9606           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9607           if (NILP (val))
9608             Vcoding_system_alist
9609               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9610                        Vcoding_system_alist);
9611         }
9612     }
9613
9614   spec_vec = Fmake_vector (make_number (3), attrs);
9615   ASET (spec_vec, 1, aliases);
9616   ASET (spec_vec, 2, eol_type);
9617
9618   Fputhash (name, spec_vec, Vcoding_system_hash_table);
9619   Vcoding_system_list = Fcons (name, Vcoding_system_list);
9620   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9621   if (NILP (val))
9622     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9623                                   Vcoding_system_alist);
9624
9625   {
9626     int id = coding_categories[category].id;
9627
9628     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9629       setup_coding_system (name, &coding_categories[category]);
9630   }
9631
9632   return Qnil;
9633
9634  short_args:
9635   return Fsignal (Qwrong_number_of_arguments,
9636                   Fcons (intern ("define-coding-system-internal"),
9637                          make_number (nargs)));
9638 }
9639
9640
9641 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9642        3, 3, 0,
9643        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
9644   (coding_system, prop, val)
9645      Lisp_Object coding_system, prop, val;
9646 {
9647   Lisp_Object spec, attrs;
9648
9649   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9650   attrs = AREF (spec, 0);
9651   if (EQ (prop, QCmnemonic))
9652     {
9653       if (! STRINGP (val))
9654         CHECK_CHARACTER (val);
9655       CODING_ATTR_MNEMONIC (attrs) = val;
9656     }
9657   else if (EQ (prop, QCdefault_char))
9658     {
9659       if (NILP (val))
9660         val = make_number (' ');
9661       else
9662         CHECK_CHARACTER (val);
9663       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9664     }
9665   else if (EQ (prop, QCdecode_translation_table))
9666     {
9667       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9668         CHECK_SYMBOL (val);
9669       CODING_ATTR_DECODE_TBL (attrs) = val;
9670     }
9671   else if (EQ (prop, QCencode_translation_table))
9672     {
9673       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9674         CHECK_SYMBOL (val);
9675       CODING_ATTR_ENCODE_TBL (attrs) = val;
9676     }
9677   else if (EQ (prop, QCpost_read_conversion))
9678     {
9679       CHECK_SYMBOL (val);
9680       CODING_ATTR_POST_READ (attrs) = val;
9681     }
9682   else if (EQ (prop, QCpre_write_conversion))
9683     {
9684       CHECK_SYMBOL (val);
9685       CODING_ATTR_PRE_WRITE (attrs) = val;
9686     }
9687   else if (EQ (prop, QCascii_compatible_p))
9688     {
9689       CODING_ATTR_ASCII_COMPAT (attrs) = val;
9690     }
9691
9692   CODING_ATTR_PLIST (attrs)
9693     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9694   return val;
9695 }
9696
9697
9698 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9699        Sdefine_coding_system_alias, 2, 2, 0,
9700        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
9701      (alias, coding_system)
9702      Lisp_Object alias, coding_system;
9703 {
9704   Lisp_Object spec, aliases, eol_type, val;
9705
9706   CHECK_SYMBOL (alias);
9707   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9708   aliases = AREF (spec, 1);
9709   /* ALIASES should be a list of length more than zero, and the first
9710      element is a base coding system.  Append ALIAS at the tail of the
9711      list.  */
9712   while (!NILP (XCDR (aliases)))
9713     aliases = XCDR (aliases);
9714   XSETCDR (aliases, Fcons (alias, Qnil));
9715
9716   eol_type = AREF (spec, 2);
9717   if (VECTORP (eol_type))
9718     {
9719       Lisp_Object subsidiaries;
9720       int i;
9721
9722       subsidiaries = make_subsidiaries (alias);
9723       for (i = 0; i < 3; i++)
9724         Fdefine_coding_system_alias (AREF (subsidiaries, i),
9725                                      AREF (eol_type, i));
9726     }
9727
9728   Fputhash (alias, spec, Vcoding_system_hash_table);
9729   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
9730   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9731   if (NILP (val))
9732     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9733                                   Vcoding_system_alist);
9734
9735   return Qnil;
9736 }
9737
9738 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9739        1, 1, 0,
9740        doc: /* Return the base of CODING-SYSTEM.
9741 Any alias or subsidiary coding system is not a base coding system.  */)
9742   (coding_system)
9743      Lisp_Object coding_system;
9744 {
9745   Lisp_Object spec, attrs;
9746
9747   if (NILP (coding_system))
9748     return (Qno_conversion);
9749   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9750   attrs = AREF (spec, 0);
9751   return CODING_ATTR_BASE_NAME (attrs);
9752 }
9753
9754 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9755        1, 1, 0,
9756        doc: "Return the property list of CODING-SYSTEM.")
9757      (coding_system)
9758      Lisp_Object coding_system;
9759 {
9760   Lisp_Object spec, attrs;
9761
9762   if (NILP (coding_system))
9763     coding_system = Qno_conversion;
9764   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9765   attrs = AREF (spec, 0);
9766   return CODING_ATTR_PLIST (attrs);
9767 }
9768
9769
9770 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9771        1, 1, 0,
9772        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
9773      (coding_system)
9774      Lisp_Object coding_system;
9775 {
9776   Lisp_Object spec;
9777
9778   if (NILP (coding_system))
9779     coding_system = Qno_conversion;
9780   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9781   return AREF (spec, 1);
9782 }
9783
9784 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9785        Scoding_system_eol_type, 1, 1, 0,
9786        doc: /* Return eol-type of CODING-SYSTEM.
9787 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
9788
9789 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9790 and CR respectively.
9791
9792 A vector value indicates that a format of end-of-line should be
9793 detected automatically.  Nth element of the vector is the subsidiary
9794 coding system whose eol-type is N.  */)
9795      (coding_system)
9796      Lisp_Object coding_system;
9797 {
9798   Lisp_Object spec, eol_type;
9799   int n;
9800
9801   if (NILP (coding_system))
9802     coding_system = Qno_conversion;
9803   if (! CODING_SYSTEM_P (coding_system))
9804     return Qnil;
9805   spec = CODING_SYSTEM_SPEC (coding_system);
9806   eol_type = AREF (spec, 2);
9807   if (VECTORP (eol_type))
9808     return Fcopy_sequence (eol_type);
9809   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9810   return make_number (n);
9811 }
9812
9813 #endif /* emacs */
9814
9815 \f
9816 /*** 9. Post-amble ***/
9817
9818 void
9819 init_coding_once ()
9820 {
9821   int i;
9822
9823   for (i = 0; i < coding_category_max; i++)
9824     {
9825       coding_categories[i].id = -1;
9826       coding_priorities[i] = i;
9827     }
9828
9829   /* ISO2022 specific initialize routine.  */
9830   for (i = 0; i < 0x20; i++)
9831     iso_code_class[i] = ISO_control_0;
9832   for (i = 0x21; i < 0x7F; i++)
9833     iso_code_class[i] = ISO_graphic_plane_0;
9834   for (i = 0x80; i < 0xA0; i++)
9835     iso_code_class[i] = ISO_control_1;
9836   for (i = 0xA1; i < 0xFF; i++)
9837     iso_code_class[i] = ISO_graphic_plane_1;
9838   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9839   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
9840   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9841   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9842   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9843   iso_code_class[ISO_CODE_ESC] = ISO_escape;
9844   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9845   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9846   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9847
9848   for (i = 0; i < 256; i++)
9849     {
9850       emacs_mule_bytes[i] = 1;
9851     }
9852   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9853   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9854   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9855   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
9856 }
9857
9858 #ifdef emacs
9859
9860 void
9861 syms_of_coding ()
9862 {
9863   staticpro (&Vcoding_system_hash_table);
9864   {
9865     Lisp_Object args[2];
9866     args[0] = QCtest;
9867     args[1] = Qeq;
9868     Vcoding_system_hash_table = Fmake_hash_table (2, args);
9869   }
9870
9871   staticpro (&Vsjis_coding_system);
9872   Vsjis_coding_system = Qnil;
9873
9874   staticpro (&Vbig5_coding_system);
9875   Vbig5_coding_system = Qnil;
9876
9877   staticpro (&Vcode_conversion_reused_workbuf);
9878   Vcode_conversion_reused_workbuf = Qnil;
9879
9880   staticpro (&Vcode_conversion_workbuf_name);
9881   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9882
9883   reused_workbuf_in_use = 0;
9884
9885   DEFSYM (Qcharset, "charset");
9886   DEFSYM (Qtarget_idx, "target-idx");
9887   DEFSYM (Qcoding_system_history, "coding-system-history");
9888   Fset (Qcoding_system_history, Qnil);
9889
9890   /* Target FILENAME is the first argument.  */
9891   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9892   /* Target FILENAME is the third argument.  */
9893   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9894
9895   DEFSYM (Qcall_process, "call-process");
9896   /* Target PROGRAM is the first argument.  */
9897   Fput (Qcall_process, Qtarget_idx, make_number (0));
9898
9899   DEFSYM (Qcall_process_region, "call-process-region");
9900   /* Target PROGRAM is the third argument.  */
9901   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9902
9903   DEFSYM (Qstart_process, "start-process");
9904   /* Target PROGRAM is the third argument.  */
9905   Fput (Qstart_process, Qtarget_idx, make_number (2));
9906
9907   DEFSYM (Qopen_network_stream, "open-network-stream");
9908   /* Target SERVICE is the fourth argument.  */
9909   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9910
9911   DEFSYM (Qcoding_system, "coding-system");
9912   DEFSYM (Qcoding_aliases, "coding-aliases");
9913
9914   DEFSYM (Qeol_type, "eol-type");
9915   DEFSYM (Qunix, "unix");
9916   DEFSYM (Qdos, "dos");
9917
9918   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9919   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9920   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9921   DEFSYM (Qdefault_char, "default-char");
9922   DEFSYM (Qundecided, "undecided");
9923   DEFSYM (Qno_conversion, "no-conversion");
9924   DEFSYM (Qraw_text, "raw-text");
9925
9926   DEFSYM (Qiso_2022, "iso-2022");
9927
9928   DEFSYM (Qutf_8, "utf-8");
9929   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9930
9931   DEFSYM (Qutf_16, "utf-16");
9932   DEFSYM (Qbig, "big");
9933   DEFSYM (Qlittle, "little");
9934
9935   DEFSYM (Qshift_jis, "shift-jis");
9936   DEFSYM (Qbig5, "big5");
9937
9938   DEFSYM (Qcoding_system_p, "coding-system-p");
9939
9940   DEFSYM (Qcoding_system_error, "coding-system-error");
9941   Fput (Qcoding_system_error, Qerror_conditions,
9942         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9943   Fput (Qcoding_system_error, Qerror_message,
9944         build_string ("Invalid coding system"));
9945
9946   /* Intern this now in case it isn't already done.
9947      Setting this variable twice is harmless.
9948      But don't staticpro it here--that is done in alloc.c.  */
9949   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9950
9951   DEFSYM (Qtranslation_table, "translation-table");
9952   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9953   DEFSYM (Qtranslation_table_id, "translation-table-id");
9954   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9955   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9956
9957   DEFSYM (Qvalid_codes, "valid-codes");
9958
9959   DEFSYM (Qemacs_mule, "emacs-mule");
9960
9961   DEFSYM (QCcategory, ":category");
9962   DEFSYM (QCmnemonic, ":mnemonic");
9963   DEFSYM (QCdefault_char, ":default-char");
9964   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9965   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9966   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9967   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9968   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
9969
9970   Vcoding_category_table
9971     = Fmake_vector (make_number (coding_category_max), Qnil);
9972   staticpro (&Vcoding_category_table);
9973   /* Followings are target of code detection.  */
9974   ASET (Vcoding_category_table, coding_category_iso_7,
9975         intern ("coding-category-iso-7"));
9976   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9977         intern ("coding-category-iso-7-tight"));
9978   ASET (Vcoding_category_table, coding_category_iso_8_1,
9979         intern ("coding-category-iso-8-1"));
9980   ASET (Vcoding_category_table, coding_category_iso_8_2,
9981         intern ("coding-category-iso-8-2"));
9982   ASET (Vcoding_category_table, coding_category_iso_7_else,
9983         intern ("coding-category-iso-7-else"));
9984   ASET (Vcoding_category_table, coding_category_iso_8_else,
9985         intern ("coding-category-iso-8-else"));
9986   ASET (Vcoding_category_table, coding_category_utf_8_auto,
9987         intern ("coding-category-utf-8-auto"));
9988   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
9989         intern ("coding-category-utf-8"));
9990   ASET (Vcoding_category_table, coding_category_utf_8_sig,
9991         intern ("coding-category-utf-8-sig"));
9992   ASET (Vcoding_category_table, coding_category_utf_16_be,
9993         intern ("coding-category-utf-16-be"));
9994   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9995         intern ("coding-category-utf-16-auto"));
9996   ASET (Vcoding_category_table, coding_category_utf_16_le,
9997         intern ("coding-category-utf-16-le"));
9998   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9999         intern ("coding-category-utf-16-be-nosig"));
10000   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10001         intern ("coding-category-utf-16-le-nosig"));
10002   ASET (Vcoding_category_table, coding_category_charset,
10003         intern ("coding-category-charset"));
10004   ASET (Vcoding_category_table, coding_category_sjis,
10005         intern ("coding-category-sjis"));
10006   ASET (Vcoding_category_table, coding_category_big5,
10007         intern ("coding-category-big5"));
10008   ASET (Vcoding_category_table, coding_category_ccl,
10009         intern ("coding-category-ccl"));
10010   ASET (Vcoding_category_table, coding_category_emacs_mule,
10011         intern ("coding-category-emacs-mule"));
10012   /* Followings are NOT target of code detection.  */
10013   ASET (Vcoding_category_table, coding_category_raw_text,
10014         intern ("coding-category-raw-text"));
10015   ASET (Vcoding_category_table, coding_category_undecided,
10016         intern ("coding-category-undecided"));
10017
10018   DEFSYM (Qinsufficient_source, "insufficient-source");
10019   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10020   DEFSYM (Qinvalid_source, "invalid-source");
10021   DEFSYM (Qinterrupted, "interrupted");
10022   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10023   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10024
10025   defsubr (&Scoding_system_p);
10026   defsubr (&Sread_coding_system);
10027   defsubr (&Sread_non_nil_coding_system);
10028   defsubr (&Scheck_coding_system);
10029   defsubr (&Sdetect_coding_region);
10030   defsubr (&Sdetect_coding_string);
10031   defsubr (&Sfind_coding_systems_region_internal);
10032   defsubr (&Sunencodable_char_position);
10033   defsubr (&Scheck_coding_systems_region);
10034   defsubr (&Sdecode_coding_region);
10035   defsubr (&Sencode_coding_region);
10036   defsubr (&Sdecode_coding_string);
10037   defsubr (&Sencode_coding_string);
10038   defsubr (&Sdecode_sjis_char);
10039   defsubr (&Sencode_sjis_char);
10040   defsubr (&Sdecode_big5_char);
10041   defsubr (&Sencode_big5_char);
10042   defsubr (&Sset_terminal_coding_system_internal);
10043   defsubr (&Sset_safe_terminal_coding_system_internal);
10044   defsubr (&Sterminal_coding_system);
10045   defsubr (&Sset_keyboard_coding_system_internal);
10046   defsubr (&Skeyboard_coding_system);
10047   defsubr (&Sfind_operation_coding_system);
10048   defsubr (&Sset_coding_system_priority);
10049   defsubr (&Sdefine_coding_system_internal);
10050   defsubr (&Sdefine_coding_system_alias);
10051   defsubr (&Scoding_system_put);
10052   defsubr (&Scoding_system_base);
10053   defsubr (&Scoding_system_plist);
10054   defsubr (&Scoding_system_aliases);
10055   defsubr (&Scoding_system_eol_type);
10056   defsubr (&Scoding_system_priority_list);
10057
10058   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10059                doc: /* List of coding systems.
10060
10061 Do not alter the value of this variable manually.  This variable should be
10062 updated by the functions `define-coding-system' and
10063 `define-coding-system-alias'.  */);
10064   Vcoding_system_list = Qnil;
10065
10066   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10067                doc: /* Alist of coding system names.
10068 Each element is one element list of coding system name.
10069 This variable is given to `completing-read' as COLLECTION argument.
10070
10071 Do not alter the value of this variable manually.  This variable should be
10072 updated by the functions `make-coding-system' and
10073 `define-coding-system-alias'.  */);
10074   Vcoding_system_alist = Qnil;
10075
10076   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10077                doc: /* List of coding-categories (symbols) ordered by priority.
10078
10079 On detecting a coding system, Emacs tries code detection algorithms
10080 associated with each coding-category one by one in this order.  When
10081 one algorithm agrees with a byte sequence of source text, the coding
10082 system bound to the corresponding coding-category is selected.
10083
10084 Don't modify this variable directly, but use `set-coding-priority'.  */);
10085   {
10086     int i;
10087
10088     Vcoding_category_list = Qnil;
10089     for (i = coding_category_max - 1; i >= 0; i--)
10090       Vcoding_category_list
10091         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10092                  Vcoding_category_list);
10093   }
10094
10095   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10096                doc: /* Specify the coding system for read operations.
10097 It is useful to bind this variable with `let', but do not set it globally.
10098 If the value is a coding system, it is used for decoding on read operation.
10099 If not, an appropriate element is used from one of the coding system alists.
10100 There are three such tables: `file-coding-system-alist',
10101 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10102   Vcoding_system_for_read = Qnil;
10103
10104   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10105                doc: /* Specify the coding system for write operations.
10106 Programs bind this variable with `let', but you should not set it globally.
10107 If the value is a coding system, it is used for encoding of output,
10108 when writing it to a file and when sending it to a file or subprocess.
10109
10110 If this does not specify a coding system, an appropriate element
10111 is used from one of the coding system alists.
10112 There are three such tables: `file-coding-system-alist',
10113 `process-coding-system-alist', and `network-coding-system-alist'.
10114 For output to files, if the above procedure does not specify a coding system,
10115 the value of `buffer-file-coding-system' is used.  */);
10116   Vcoding_system_for_write = Qnil;
10117
10118   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10119                doc: /*
10120 Coding system used in the latest file or process I/O.  */);
10121   Vlast_coding_system_used = Qnil;
10122
10123   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10124                doc: /*
10125 Error status of the last code conversion.
10126
10127 When an error was detected in the last code conversion, this variable
10128 is set to one of the following symbols.
10129   `insufficient-source'
10130   `inconsistent-eol'
10131   `invalid-source'
10132   `interrupted'
10133   `insufficient-memory'
10134 When no error was detected, the value doesn't change.  So, to check
10135 the error status of a code conversion by this variable, you must
10136 explicitly set this variable to nil before performing code
10137 conversion.  */);
10138   Vlast_code_conversion_error = Qnil;
10139
10140   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10141                doc: /*
10142 *Non-nil means always inhibit code conversion of end-of-line format.
10143 See info node `Coding Systems' and info node `Text and Binary' concerning
10144 such conversion.  */);
10145   inhibit_eol_conversion = 0;
10146
10147   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10148                doc: /*
10149 Non-nil means process buffer inherits coding system of process output.
10150 Bind it to t if the process output is to be treated as if it were a file
10151 read from some filesystem.  */);
10152   inherit_process_coding_system = 0;
10153
10154   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10155                doc: /*
10156 Alist to decide a coding system to use for a file I/O operation.
10157 The format is ((PATTERN . VAL) ...),
10158 where PATTERN is a regular expression matching a file name,
10159 VAL is a coding system, a cons of coding systems, or a function symbol.
10160 If VAL is a coding system, it is used for both decoding and encoding
10161 the file contents.
10162 If VAL is a cons of coding systems, the car part is used for decoding,
10163 and the cdr part is used for encoding.
10164 If VAL is a function symbol, the function must return a coding system
10165 or a cons of coding systems which are used as above.  The function is
10166 called with an argument that is a list of the arguments with which
10167 `find-operation-coding-system' was called.  If the function can't decide
10168 a coding system, it can return `undecided' so that the normal
10169 code-detection is performed.
10170
10171 See also the function `find-operation-coding-system'
10172 and the variable `auto-coding-alist'.  */);
10173   Vfile_coding_system_alist = Qnil;
10174
10175   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10176                doc: /*
10177 Alist to decide a coding system to use for a process I/O operation.
10178 The format is ((PATTERN . VAL) ...),
10179 where PATTERN is a regular expression matching a program name,
10180 VAL is a coding system, a cons of coding systems, or a function symbol.
10181 If VAL is a coding system, it is used for both decoding what received
10182 from the program and encoding what sent to the program.
10183 If VAL is a cons of coding systems, the car part is used for decoding,
10184 and the cdr part is used for encoding.
10185 If VAL is a function symbol, the function must return a coding system
10186 or a cons of coding systems which are used as above.
10187
10188 See also the function `find-operation-coding-system'.  */);
10189   Vprocess_coding_system_alist = Qnil;
10190
10191   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10192                doc: /*
10193 Alist to decide a coding system to use for a network I/O operation.
10194 The format is ((PATTERN . VAL) ...),
10195 where PATTERN is a regular expression matching a network service name
10196 or is a port number to connect to,
10197 VAL is a coding system, a cons of coding systems, or a function symbol.
10198 If VAL is a coding system, it is used for both decoding what received
10199 from the network stream and encoding what sent to the network stream.
10200 If VAL is a cons of coding systems, the car part is used for decoding,
10201 and the cdr part is used for encoding.
10202 If VAL is a function symbol, the function must return a coding system
10203 or a cons of coding systems which are used as above.
10204
10205 See also the function `find-operation-coding-system'.  */);
10206   Vnetwork_coding_system_alist = Qnil;
10207
10208   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10209                doc: /* Coding system to use with system messages.
10210 Also used for decoding keyboard input on X Window system.  */);
10211   Vlocale_coding_system = Qnil;
10212
10213   /* The eol mnemonics are reset in startup.el system-dependently.  */
10214   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10215                doc: /*
10216 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10217   eol_mnemonic_unix = build_string (":");
10218
10219   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10220                doc: /*
10221 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10222   eol_mnemonic_dos = build_string ("\\");
10223
10224   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10225                doc: /*
10226 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10227   eol_mnemonic_mac = build_string ("/");
10228
10229   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10230                doc: /*
10231 *String displayed in mode line when end-of-line format is not yet determined.  */);
10232   eol_mnemonic_undecided = build_string (":");
10233
10234   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10235                doc: /*
10236 *Non-nil enables character translation while encoding and decoding.  */);
10237   Venable_character_translation = Qt;
10238
10239   DEFVAR_LISP ("standard-translation-table-for-decode",
10240                &Vstandard_translation_table_for_decode,
10241                doc: /* Table for translating characters while decoding.  */);
10242   Vstandard_translation_table_for_decode = Qnil;
10243
10244   DEFVAR_LISP ("standard-translation-table-for-encode",
10245                &Vstandard_translation_table_for_encode,
10246                doc: /* Table for translating characters while encoding.  */);
10247   Vstandard_translation_table_for_encode = Qnil;
10248
10249   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10250                doc: /* Alist of charsets vs revision numbers.
10251 While encoding, if a charset (car part of an element) is found,
10252 designate it with the escape sequence identifying revision (cdr part
10253 of the element).  */);
10254   Vcharset_revision_table = Qnil;
10255
10256   DEFVAR_LISP ("default-process-coding-system",
10257                &Vdefault_process_coding_system,
10258                doc: /* Cons of coding systems used for process I/O by default.
10259 The car part is used for decoding a process output,
10260 the cdr part is used for encoding a text to be sent to a process.  */);
10261   Vdefault_process_coding_system = Qnil;
10262
10263   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10264                doc: /*
10265 Table of extra Latin codes in the range 128..159 (inclusive).
10266 This is a vector of length 256.
10267 If Nth element is non-nil, the existence of code N in a file
10268 \(or output of subprocess) doesn't prevent it to be detected as
10269 a coding system of ISO 2022 variant which has a flag
10270 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10271 or reading output of a subprocess.
10272 Only 128th through 159th elements have a meaning.  */);
10273   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10274
10275   DEFVAR_LISP ("select-safe-coding-system-function",
10276                &Vselect_safe_coding_system_function,
10277                doc: /*
10278 Function to call to select safe coding system for encoding a text.
10279
10280 If set, this function is called to force a user to select a proper
10281 coding system which can encode the text in the case that a default
10282 coding system used in each operation can't encode the text.  The
10283 function should take care that the buffer is not modified while
10284 the coding system is being selected.
10285
10286 The default value is `select-safe-coding-system' (which see).  */);
10287   Vselect_safe_coding_system_function = Qnil;
10288
10289   DEFVAR_BOOL ("coding-system-require-warning",
10290                &coding_system_require_warning,
10291                doc: /* Internal use only.
10292 If non-nil, on writing a file, `select-safe-coding-system-function' is
10293 called even if `coding-system-for-write' is non-nil.  The command
10294 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10295   coding_system_require_warning = 0;
10296
10297
10298   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10299                &inhibit_iso_escape_detection,
10300                doc: /*
10301 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10302
10303 When Emacs reads text, it tries to detect how the text is encoded.
10304 This code detection is sensitive to escape sequences.  If Emacs sees
10305 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10306 of the ISO2022 encodings, and decodes text by the corresponding coding
10307 system (e.g. `iso-2022-7bit').
10308
10309 However, there may be a case that you want to read escape sequences in
10310 a file as is.  In such a case, you can set this variable to non-nil.
10311 Then the code detection will ignore any escape sequences, and no text is
10312 detected as encoded in some ISO-2022 encoding.  The result is that all
10313 escape sequences become visible in a buffer.
10314
10315 The default value is nil, and it is strongly recommended not to change
10316 it.  That is because many Emacs Lisp source files that contain
10317 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10318 in Emacs's distribution, and they won't be decoded correctly on
10319 reading if you suppress escape sequence detection.
10320
10321 The other way to read escape sequences in a file without decoding is
10322 to explicitly specify some coding system that doesn't use ISO-2022
10323 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10324   inhibit_iso_escape_detection = 0;
10325
10326   DEFVAR_BOOL ("inhibit-null-byte-detection",
10327                &inhibit_null_byte_detection,
10328                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10329 By default, Emacs treats it as binary data, and does not attempt to
10330 decode it.  The effect is as if you specified `no-conversion' for
10331 reading that text.
10332
10333 Set this to non-nil when a regular text happens to include null bytes.
10334 Examples are Index nodes of Info files and null-byte delimited output
10335 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10336 decode text as usual.  */);
10337   inhibit_null_byte_detection = 0;
10338
10339   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10340                doc: /* Char table for translating self-inserting characters.
10341 This is applied to the result of input methods, not their input.
10342 See also `keyboard-translate-table'.  */);
10343     Vtranslation_table_for_input = Qnil;
10344
10345   {
10346     Lisp_Object args[coding_arg_max];
10347     Lisp_Object plist[16];
10348     int i;
10349
10350     for (i = 0; i < coding_arg_max; i++)
10351       args[i] = Qnil;
10352
10353     plist[0] = intern (":name");
10354     plist[1] = args[coding_arg_name] = Qno_conversion;
10355     plist[2] = intern (":mnemonic");
10356     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10357     plist[4] = intern (":coding-type");
10358     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10359     plist[6] = intern (":ascii-compatible-p");
10360     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10361     plist[8] = intern (":default-char");
10362     plist[9] = args[coding_arg_default_char] = make_number (0);
10363     plist[10] = intern (":for-unibyte");
10364     plist[11] = args[coding_arg_for_unibyte] = Qt;
10365     plist[12] = intern (":docstring");
10366     plist[13] = build_string ("Do no conversion.\n\
10367 \n\
10368 When you visit a file with this coding, the file is read into a\n\
10369 unibyte buffer as is, thus each byte of a file is treated as a\n\
10370 character.");
10371     plist[14] = intern (":eol-type");
10372     plist[15] = args[coding_arg_eol_type] = Qunix;
10373     args[coding_arg_plist] = Flist (16, plist);
10374     Fdefine_coding_system_internal (coding_arg_max, args);
10375
10376     plist[1] = args[coding_arg_name] = Qundecided;
10377     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10378     plist[5] = args[coding_arg_coding_type] = Qundecided;
10379     /* This is already set.
10380        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10381     plist[8] = intern (":charset-list");
10382     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10383     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10384     plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10385     plist[15] = args[coding_arg_eol_type] = Qnil;
10386     args[coding_arg_plist] = Flist (16, plist);
10387     Fdefine_coding_system_internal (coding_arg_max, args);
10388   }
10389
10390   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10391
10392   {
10393     int i;
10394
10395     for (i = 0; i < coding_category_max; i++)
10396       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10397   }
10398 #if defined (MSDOS) || defined (WINDOWSNT)
10399   system_eol_type = Qdos;
10400 #else
10401   system_eol_type = Qunix;
10402 #endif
10403   staticpro (&system_eol_type);
10404 }
10405
10406 char *
10407 emacs_strerror (error_number)
10408      int error_number;
10409 {
10410   char *str;
10411
10412   synchronize_system_messages_locale ();
10413   str = strerror (error_number);
10414
10415   if (! NILP (Vlocale_coding_system))
10416     {
10417       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10418                                                       Vlocale_coding_system,
10419                                                       0);
10420       str = (char *) SDATA (dec);
10421     }
10422
10423   return str;
10424 }
10425
10426 #endif /* emacs */
10427
10428 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10429    (do not change this comment) */