src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008, 2009, 2010
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software: you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation, either version 3 of the License, or
  17 (at your option) any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  26
  27 /*** TABLE OF CONTENTS ***
  28
  29   0. General comments
  30   1. Preamble
  31   2. Emacs' internal format (emacs-utf-8) handlers
  32   3. UTF-8 handlers
  33   4. UTF-16 handlers
  34   5. Charset-base coding systems handlers
  35   6. emacs-mule (old Emacs' internal format) handlers
  36   7. ISO2022 handlers
  37   8. Shift-JIS and BIG5 handlers
  38   9. CCL handlers
  39   10. C library functions
  40   11. Emacs Lisp library functions
  41   12. Postamble
  42
  43 */
  44
  45 /*** 0. General comments ***
  46
  47
  48 CODING SYSTEM
  49
  50   A coding system is an object for an encoding mechanism that contains
  51   information about how to convert byte sequences to character
  52   sequences and vice versa.  When we say "decode", it means converting
  53   a byte sequence of a specific coding system into a character
  54   sequence that is represented by Emacs' internal coding system
  55   `emacs-utf-8', and when we say "encode", it means converting a
  56   character sequence of emacs-utf-8 to a byte sequence of a specific
  57   coding system.
  58
  59   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  60   C level, a coding system is represented by a vector of attributes
  61   stored in the hash table Vcharset_hash_table.  The conversion from
  62   coding system symbol to attributes vector is done by looking up
  63   Vcharset_hash_table by the symbol.
  64
  65   Coding systems are classified into the following types depending on
  66   the encoding mechanism.  Here's a brief description of the types.
  67
  68   o UTF-8
  69
  70   o UTF-16
  71
  72   o Charset-base coding system
  73
  74   A coding system defined by one or more (coded) character sets.
  75   Decoding and encoding are done by a code converter defined for each
  76   character set.
  77
  78   o Old Emacs internal format (emacs-mule)
  79
  80   The coding system adopted by old versions of Emacs (20 and 21).
  81
  82   o ISO2022-base coding system
  83
  84   The most famous coding system for multiple character sets.  X's
  85   Compound Text, various EUCs (Extended Unix Code), and coding systems
  86   used in the Internet communication such as ISO-2022-JP are all
  87   variants of ISO2022.
  88
  89   o SJIS (or Shift-JIS or MS-Kanji-Code)
  90
  91   A coding system to encode character sets: ASCII, JISX0201, and
  92   JISX0208.  Widely used for PC's in Japan.  Details are described in
  93   section 8.
  94
  95   o BIG5
  96
  97   A coding system to encode character sets: ASCII and Big5.  Widely
  98   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  99   described in section 8.  In this file, when we write "big5" (all
 100   lowercase), we mean the coding system, and when we write "Big5"
 101   (capitalized), we mean the character set.
 102
 103   o CCL
 104
 105   If a user wants to decode/encode text encoded in a coding system
 106   not listed above, he can supply a decoder and an encoder for it in
 107   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 108   program while decoding/encoding.
 109
 110   o Raw-text
 111
 112   A coding system for text containing raw eight-bit data.  Emacs
 113   treats each byte of source text as a character (except for
 114   end-of-line conversion).
 115
 116   o No-conversion
 117
 118   Like raw text, but don't do end-of-line conversion.
 119
 120
 121 END-OF-LINE FORMAT
 122
 123   How text end-of-line is encoded depends on operating system.  For
 124   instance, Unix's format is just one byte of LF (line-feed) code,
 125   whereas DOS's format is two-byte sequence of `carriage-return' and
 126   `line-feed' codes.  MacOS's format is usually one byte of
 127   `carriage-return'.
 128
 129   Since text character encoding and end-of-line encoding are
 130   independent, any coding system described above can take any format
 131   of end-of-line (except for no-conversion).
 132
 133 STRUCT CODING_SYSTEM
 134
 135   Before using a coding system for code conversion (i.e. decoding and
 136   encoding), we setup a structure of type `struct coding_system'.
 137   This structure keeps various information about a specific code
 138   conversion (e.g. the location of source and destination data).
 139
 140 */
 141
 142 /* COMMON MACROS */
 143
 144
 145 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 146
 147   These functions check if a byte sequence specified as a source in
 148   CODING conforms to the format of XXX, and update the members of
 149   DETECT_INFO.
 150
 151   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 152
 153   Below is the template of these functions.  */
 154
 155 #if 0
 156 static int
 157 detect_coding_XXX (struct coding_system *coding,
 158                    struct coding_detection_info *detect_info)
 159 {
 160   const unsigned char *src = coding->source;
 161   const unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (struct coding_system *coding)
 206 {
 207   const unsigned char *src = coding->source + coding->consumed;
 208   const unsigned char *src_end = coding->source + coding->src_bytes;
 209   /* SRC_BASE remembers the start position in source in each loop.
 210      The loop will be exited when there's not enough source code, or
 211      when there's no room in CHARBUF for a decoded character.  */
 212   const unsigned char *src_base;
 213   /* A buffer to produce decoded characters.  */
 214   int *charbuf = coding->charbuf + coding->charbuf_used;
 215   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 216   int multibytep = coding->src_multibyte;
 217
 218   while (1)
 219     {
 220       src_base = src;
 221       if (charbuf < charbuf_end)
 222         /* No more room to produce a decoded character.  */
 223         break;
 224       ONE_MORE_BYTE (c);
 225       /* Decode it. */
 226     }
 227
 228  no_more_source:
 229   if (src_base < src_end
 230       && coding->mode & CODING_MODE_LAST_BLOCK)
 231     /* If the source ends by partial bytes to construct a character,
 232        treat them as eight-bit raw data.  */
 233     while (src_base < src_end && charbuf < charbuf_end)
 234       *charbuf++ = *src_base++;
 235   /* Remember how many bytes and characters we consumed.  If the
 236      source is multibyte, the bytes and chars are not identical.  */
 237   coding->consumed = coding->consumed_char = src_base - coding->source;
 238   /* Remember how many characters we produced.  */
 239   coding->charbuf_used = charbuf - coding->charbuf;
 240 }
 241 #endif
 242
 243 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 244
 245   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 246   internal multibyte format by CODING.  The resulting byte sequence
 247   goes to a place pointed to by DESTINATION, the length of which
 248   should not exceed DST_BYTES.
 249
 250   These functions set the information of original and encoded texts in
 251   the members produced, produced_char, consumed, and consumed_char of
 252   the structure *CODING.  They also set the member result to one of
 253   CODING_RESULT_XXX indicating how the encoding finished.
 254
 255   DST_BYTES zero means that source area and destination area are
 256   overlapped, which means that we can produce a encoded text until it
 257   reaches at the head of not-yet-encoded source text.
 258
 259   Below is a template of these functions.  */
 260 #if 0
 261 static void
 262 encode_coding_XXX (struct coding_system *coding)
 263 {
 264   int multibytep = coding->dst_multibyte;
 265   int *charbuf = coding->charbuf;
 266   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 267   unsigned char *dst = coding->destination + coding->produced;
 268   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 269   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 270   int produced_chars = 0;
 271
 272   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 273     {
 274       int c = *charbuf;
 275       /* Encode C into DST, and increment DST.  */
 276     }
 277  label_no_more_destination:
 278   /* How many chars and bytes we produced.  */
 279   coding->produced_char += produced_chars;
 280   coding->produced = dst - coding->destination;
 281 }
 282 #endif
 283
 284 \f
 285 /*** 1. Preamble ***/
 286
 287 #include <config.h>
 288 #include <stdio.h>
 289 #include <setjmp.h>
 290
 291 #include "lisp.h"
 292 #include "buffer.h"
 293 #include "character.h"
 294 #include "charset.h"
 295 #include "ccl.h"
 296 #include "composite.h"
 297 #include "coding.h"
 298 #include "window.h"
 299 #include "frame.h"
 300 #include "termhooks.h"
 301
 302 Lisp_Object Vcoding_system_hash_table;
 303
 304 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 305 Lisp_Object Qunix, Qdos;
 306 extern Lisp_Object Qmac;        /* frame.c */
 307 Lisp_Object Qbuffer_file_coding_system;
 308 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 309 Lisp_Object Qdefault_char;
 310 Lisp_Object Qno_conversion, Qundecided;
 311 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 312 Lisp_Object Qbig, Qlittle;
 313 Lisp_Object Qcoding_system_history;
 314 Lisp_Object Qvalid_codes;
 315 Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 316 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 317 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 318 Lisp_Object QCascii_compatible_p;
 319
 320 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 321 Lisp_Object Qcall_process, Qcall_process_region;
 322 Lisp_Object Qstart_process, Qopen_network_stream;
 323 Lisp_Object Qtarget_idx;
 324
 325 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 326 Lisp_Object Qinterrupted, Qinsufficient_memory;
 327
 328 extern Lisp_Object Qcompletion_ignore_case;
 329
 330 /* If a symbol has this property, evaluate the value to define the
 331    symbol as a coding system.  */
 332 static Lisp_Object Qcoding_system_define_form;
 333
 334 int coding_system_require_warning;
 335
 336 Lisp_Object Vselect_safe_coding_system_function;
 337
 338 /* Mnemonic string for each format of end-of-line.  */
 339 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 340 /* Mnemonic string to indicate format of end-of-line is not yet
 341    decided.  */
 342 Lisp_Object eol_mnemonic_undecided;
 343
 344 /* Format of end-of-line decided by system.  This is Qunix on
 345    Unix and Mac, Qdos on DOS/Windows.
 346    This has an effect only for external encoding (i.e. for output to
 347    file and process), not for in-buffer or Lisp string encoding.  */
 348 static Lisp_Object system_eol_type;
 349
 350 #ifdef emacs
 351
 352 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 353
 354 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 355
 356 /* Coding system emacs-mule and raw-text are for converting only
 357    end-of-line format.  */
 358 Lisp_Object Qemacs_mule, Qraw_text;
 359 Lisp_Object Qutf_8_emacs;
 360
 361 /* Coding-systems are handed between Emacs Lisp programs and C internal
 362    routines by the following three variables.  */
 363 /* Coding-system for reading files and receiving data from process.  */
 364 Lisp_Object Vcoding_system_for_read;
 365 /* Coding-system for writing files and sending data to process.  */
 366 Lisp_Object Vcoding_system_for_write;
 367 /* Coding-system actually used in the latest I/O.  */
 368 Lisp_Object Vlast_coding_system_used;
 369 /* Set to non-nil when an error is detected while code conversion.  */
 370 Lisp_Object Vlast_code_conversion_error;
 371 /* A vector of length 256 which contains information about special
 372    Latin codes (especially for dealing with Microsoft codes).  */
 373 Lisp_Object Vlatin_extra_code_table;
 374
 375 /* Flag to inhibit code conversion of end-of-line format.  */
 376 int inhibit_eol_conversion;
 377
 378 /* Flag to inhibit ISO2022 escape sequence detection.  */
 379 int inhibit_iso_escape_detection;
 380
 381 /* Flag to inhibit detection of binary files through null bytes.  */
 382 int inhibit_null_byte_detection;
 383
 384 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 385 int inherit_process_coding_system;
 386
 387 /* Coding system to be used to encode text for terminal display when
 388    terminal coding system is nil.  */
 389 struct coding_system safe_terminal_coding;
 390
 391 Lisp_Object Vfile_coding_system_alist;
 392 Lisp_Object Vprocess_coding_system_alist;
 393 Lisp_Object Vnetwork_coding_system_alist;
 394
 395 Lisp_Object Vlocale_coding_system;
 396
 397 #endif /* emacs */
 398
 399 /* Flag to tell if we look up translation table on character code
 400    conversion.  */
 401 Lisp_Object Venable_character_translation;
 402 /* Standard translation table to look up on decoding (reading).  */
 403 Lisp_Object Vstandard_translation_table_for_decode;
 404 /* Standard translation table to look up on encoding (writing).  */
 405 Lisp_Object Vstandard_translation_table_for_encode;
 406
 407 Lisp_Object Qtranslation_table;
 408 Lisp_Object Qtranslation_table_id;
 409 Lisp_Object Qtranslation_table_for_decode;
 410 Lisp_Object Qtranslation_table_for_encode;
 411
 412 /* Alist of charsets vs revision number.  */
 413 static Lisp_Object Vcharset_revision_table;
 414
 415 /* Default coding systems used for process I/O.  */
 416 Lisp_Object Vdefault_process_coding_system;
 417
 418 /* Char table for translating Quail and self-inserting input.  */
 419 Lisp_Object Vtranslation_table_for_input;
 420
 421 /* Two special coding systems.  */
 422 Lisp_Object Vsjis_coding_system;
 423 Lisp_Object Vbig5_coding_system;
 424
 425 /* ISO2022 section */
 426
 427 #define CODING_ISO_INITIAL(coding, reg)                 \
 428   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 429                      coding_attr_iso_initial),          \
 430                reg)))
 431
 432
 433 #define CODING_ISO_REQUEST(coding, charset_id)          \
 434   (((charset_id) <= (coding)->max_charset_id            \
 435     ? ((coding)->safe_charsets[charset_id] != 255       \
 436        ? (coding)->safe_charsets[charset_id]            \
 437        : -1)                                            \
 438     : -1))
 439
 440
 441 #define CODING_ISO_FLAGS(coding)        \
 442   ((coding)->spec.iso_2022.flags)
 443 #define CODING_ISO_DESIGNATION(coding, reg)     \
 444   ((coding)->spec.iso_2022.current_designation[reg])
 445 #define CODING_ISO_INVOCATION(coding, plane)    \
 446   ((coding)->spec.iso_2022.current_invocation[plane])
 447 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 448   ((coding)->spec.iso_2022.single_shifting)
 449 #define CODING_ISO_BOL(coding)  \
 450   ((coding)->spec.iso_2022.bol)
 451 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 452   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 453 #define CODING_ISO_CMP_STATUS(coding)   \
 454   (&(coding)->spec.iso_2022.cmp_status)
 455 #define CODING_ISO_EXTSEGMENT_LEN(coding)       \
 456   ((coding)->spec.iso_2022.ctext_extended_segment_len)
 457 #define CODING_ISO_EMBEDDED_UTF_8(coding)       \
 458   ((coding)->spec.iso_2022.embedded_utf_8)
 459
 460 /* Control characters of ISO2022.  */
 461                         /* code */      /* function */
 462 #define ISO_CODE_LF     0x0A            /* line-feed */
 463 #define ISO_CODE_CR     0x0D            /* carriage-return */
 464 #define ISO_CODE_SO     0x0E            /* shift-out */
 465 #define ISO_CODE_SI     0x0F            /* shift-in */
 466 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 467 #define ISO_CODE_ESC    0x1B            /* escape */
 468 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 469 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 470 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 471
 472 /* All code (1-byte) of ISO2022 is classified into one of the
 473    followings.  */
 474 enum iso_code_class_type
 475   {
 476     ISO_control_0,              /* Control codes in the range
 477                                    0x00..0x1F and 0x7F, except for the
 478                                    following 5 codes.  */
 479     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 480     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 481     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 482     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 483     ISO_control_1,              /* Control codes in the range
 484                                    0x80..0x9F, except for the
 485                                    following 3 codes.  */
 486     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 487     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 488     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 489     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 490     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 491     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 492     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 493   };
 494
 495 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 496     `iso-flags' attribute of an iso2022 coding system.  */
 497
 498 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 499    instead of the correct short-form sequence (e.g. ESC $ A).  */
 500 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 501
 502 /* If set, reset graphic planes and registers at end-of-line to the
 503    initial state.  */
 504 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 505
 506 /* If set, reset graphic planes and registers before any control
 507    characters to the initial state.  */
 508 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 509
 510 /* If set, encode by 7-bit environment.  */
 511 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 512
 513 /* If set, use locking-shift function.  */
 514 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 515
 516 /* If set, use single-shift function.  Overwrite
 517    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 518 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 519
 520 /* If set, use designation escape sequence.  */
 521 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 522
 523 /* If set, produce revision number sequence.  */
 524 #define CODING_ISO_FLAG_REVISION        0x0080
 525
 526 /* If set, produce ISO6429's direction specifying sequence.  */
 527 #define CODING_ISO_FLAG_DIRECTION       0x0100
 528
 529 /* If set, assume designation states are reset at beginning of line on
 530    output.  */
 531 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 532
 533 /* If set, designation sequence should be placed at beginning of line
 534    on output.  */
 535 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 536
 537 /* If set, do not encode unsafe charactes on output.  */
 538 #define CODING_ISO_FLAG_SAFE            0x0800
 539
 540 /* If set, extra latin codes (128..159) are accepted as a valid code
 541    on input.  */
 542 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 543
 544 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 545
 546 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 547
 548 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 549
 550 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 551
 552 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 553
 554 /* A character to be produced on output if encoding of the original
 555    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 556 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 557
 558 /* UTF-8 section */
 559 #define CODING_UTF_8_BOM(coding)        \
 560   ((coding)->spec.utf_8_bom)
 561
 562 /* UTF-16 section */
 563 #define CODING_UTF_16_BOM(coding)       \
 564   ((coding)->spec.utf_16.bom)
 565
 566 #define CODING_UTF_16_ENDIAN(coding)    \
 567   ((coding)->spec.utf_16.endian)
 568
 569 #define CODING_UTF_16_SURROGATE(coding) \
 570   ((coding)->spec.utf_16.surrogate)
 571
 572
 573 /* CCL section */
 574 #define CODING_CCL_DECODER(coding)      \
 575   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 576 #define CODING_CCL_ENCODER(coding)      \
 577   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 578 #define CODING_CCL_VALIDS(coding)                                          \
 579   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 580
 581 /* Index for each coding category in `coding_categories' */
 582
 583 enum coding_category
 584   {
 585     coding_category_iso_7,
 586     coding_category_iso_7_tight,
 587     coding_category_iso_8_1,
 588     coding_category_iso_8_2,
 589     coding_category_iso_7_else,
 590     coding_category_iso_8_else,
 591     coding_category_utf_8_auto,
 592     coding_category_utf_8_nosig,
 593     coding_category_utf_8_sig,
 594     coding_category_utf_16_auto,
 595     coding_category_utf_16_be,
 596     coding_category_utf_16_le,
 597     coding_category_utf_16_be_nosig,
 598     coding_category_utf_16_le_nosig,
 599     coding_category_charset,
 600     coding_category_sjis,
 601     coding_category_big5,
 602     coding_category_ccl,
 603     coding_category_emacs_mule,
 604     /* All above are targets of code detection.  */
 605     coding_category_raw_text,
 606     coding_category_undecided,
 607     coding_category_max
 608   };
 609
 610 /* Definitions of flag bits used in detect_coding_XXXX.  */
 611 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 612 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 613 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 614 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 615 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 616 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 617 #define CATEGORY_MASK_UTF_8_AUTO        (1 << coding_category_utf_8_auto)
 618 #define CATEGORY_MASK_UTF_8_NOSIG       (1 << coding_category_utf_8_nosig)
 619 #define CATEGORY_MASK_UTF_8_SIG         (1 << coding_category_utf_8_sig)
 620 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 621 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 622 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 623 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 624 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 625 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 626 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 627 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 628 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 629 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 630 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 631
 632 /* This value is returned if detect_coding_mask () find nothing other
 633    than ASCII characters.  */
 634 #define CATEGORY_MASK_ANY               \
 635   (CATEGORY_MASK_ISO_7                  \
 636    | CATEGORY_MASK_ISO_7_TIGHT          \
 637    | CATEGORY_MASK_ISO_8_1              \
 638    | CATEGORY_MASK_ISO_8_2              \
 639    | CATEGORY_MASK_ISO_7_ELSE           \
 640    | CATEGORY_MASK_ISO_8_ELSE           \
 641    | CATEGORY_MASK_UTF_8_AUTO           \
 642    | CATEGORY_MASK_UTF_8_NOSIG          \
 643    | CATEGORY_MASK_UTF_8_SIG            \
 644    | CATEGORY_MASK_UTF_16_AUTO          \
 645    | CATEGORY_MASK_UTF_16_BE            \
 646    | CATEGORY_MASK_UTF_16_LE            \
 647    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 648    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 649    | CATEGORY_MASK_CHARSET              \
 650    | CATEGORY_MASK_SJIS                 \
 651    | CATEGORY_MASK_BIG5                 \
 652    | CATEGORY_MASK_CCL                  \
 653    | CATEGORY_MASK_EMACS_MULE)
 654
 655
 656 #define CATEGORY_MASK_ISO_7BIT \
 657   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 658
 659 #define CATEGORY_MASK_ISO_8BIT \
 660   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 661
 662 #define CATEGORY_MASK_ISO_ELSE \
 663   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 664
 665 #define CATEGORY_MASK_ISO_ESCAPE        \
 666   (CATEGORY_MASK_ISO_7                  \
 667    | CATEGORY_MASK_ISO_7_TIGHT          \
 668    | CATEGORY_MASK_ISO_7_ELSE           \
 669    | CATEGORY_MASK_ISO_8_ELSE)
 670
 671 #define CATEGORY_MASK_ISO       \
 672   (  CATEGORY_MASK_ISO_7BIT     \
 673      | CATEGORY_MASK_ISO_8BIT   \
 674      | CATEGORY_MASK_ISO_ELSE)
 675
 676 #define CATEGORY_MASK_UTF_16            \
 677   (CATEGORY_MASK_UTF_16_AUTO            \
 678    | CATEGORY_MASK_UTF_16_BE            \
 679    | CATEGORY_MASK_UTF_16_LE            \
 680    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 681    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 682
 683 #define CATEGORY_MASK_UTF_8     \
 684   (CATEGORY_MASK_UTF_8_AUTO     \
 685    | CATEGORY_MASK_UTF_8_NOSIG  \
 686    | CATEGORY_MASK_UTF_8_SIG)
 687
 688 /* List of symbols `coding-category-xxx' ordered by priority.  This
 689    variable is exposed to Emacs Lisp.  */
 690 static Lisp_Object Vcoding_category_list;
 691
 692 /* Table of coding categories (Lisp symbols).  This variable is for
 693    internal use oly.  */
 694 static Lisp_Object Vcoding_category_table;
 695
 696 /* Table of coding-categories ordered by priority.  */
 697 static enum coding_category coding_priorities[coding_category_max];
 698
 699 /* Nth element is a coding context for the coding system bound to the
 700    Nth coding category.  */
 701 static struct coding_system coding_categories[coding_category_max];
 702
 703 /*** Commonly used macros and functions ***/
 704
 705 #ifndef min
 706 #define min(a, b) ((a) < (b) ? (a) : (b))
 707 #endif
 708 #ifndef max
 709 #define max(a, b) ((a) > (b) ? (a) : (b))
 710 #endif
 711
 712 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 713   do {                                                  \
 714     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 715     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 716   } while (0)
 717
 718
 719 /* Safely get one byte from the source text pointed by SRC which ends
 720    at SRC_END, and set C to that byte.  If there are not enough bytes
 721    in the source, it jumps to `no_more_source'.  If multibytep is
 722    nonzero, and a multibyte character is found at SRC, set C to the
 723    negative value of the character code.  The caller should declare
 724    and set these variables appropriately in advance:
 725         src, src_end, multibytep */
 726
 727 #define ONE_MORE_BYTE(c)                                \
 728   do {                                                  \
 729     if (src == src_end)                                 \
 730       {                                                 \
 731         if (src_base < src)                             \
 732           record_conversion_result                      \
 733             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 734         goto no_more_source;                            \
 735       }                                                 \
 736     c = *src++;                                         \
 737     if (multibytep && (c & 0x80))                       \
 738       {                                                 \
 739         if ((c & 0xFE) == 0xC0)                         \
 740           c = ((c & 1) << 6) | *src++;                  \
 741         else                                            \
 742           {                                             \
 743             src--;                                      \
 744             c = - string_char (src, &src, NULL);        \
 745             record_conversion_result                    \
 746               (coding, CODING_RESULT_INVALID_SRC);      \
 747           }                                             \
 748       }                                                 \
 749     consumed_chars++;                                   \
 750   } while (0)
 751
 752 /* Safely get two bytes from the source text pointed by SRC which ends
 753    at SRC_END, and set C1 and C2 to those bytes while skipping the
 754    heading multibyte characters.  If there are not enough bytes in the
 755    source, it jumps to `no_more_source'.  If multibytep is nonzero and
 756    a multibyte character is found for C2, set C2 to the negative value
 757    of the character code.  The caller should declare and set these
 758    variables appropriately in advance:
 759         src, src_end, multibytep
 760    It is intended that this macro is used in detect_coding_utf_16.  */
 761
 762 #define TWO_MORE_BYTES(c1, c2)                          \
 763   do {                                                  \
 764     do {                                                \
 765       if (src == src_end)                               \
 766         goto no_more_source;                            \
 767       c1 = *src++;                                      \
 768       if (multibytep && (c1 & 0x80))                    \
 769         {                                               \
 770           if ((c1 & 0xFE) == 0xC0)                      \
 771             c1 = ((c1 & 1) << 6) | *src++;              \
 772           else                                          \
 773             {                                           \
 774               src += BYTES_BY_CHAR_HEAD (c1) - 1;       \
 775               c1 = -1;                                  \
 776             }                                           \
 777         }                                               \
 778     } while (c1 < 0);                                   \
 779     if (src == src_end)                                 \
 780       goto no_more_source;                              \
 781     c2 = *src++;                                        \
 782     if (multibytep && (c2 & 0x80))                      \
 783       {                                                 \
 784         if ((c2 & 0xFE) == 0xC0)                        \
 785           c2 = ((c2 & 1) << 6) | *src++;                \
 786         else                                            \
 787           c2 = -1;                                      \
 788       }                                                 \
 789   } while (0)
 790
 791
 792 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 793   do {                                                  \
 794     c = *src++;                                         \
 795     if (multibytep && (c & 0x80))                       \
 796       {                                                 \
 797         if ((c & 0xFE) == 0xC0)                         \
 798           c = ((c & 1) << 6) | *src++;                  \
 799         else                                            \
 800           {                                             \
 801             src--;                                      \
 802             c = - string_char (src, &src, NULL);        \
 803             record_conversion_result                    \
 804               (coding, CODING_RESULT_INVALID_SRC);      \
 805           }                                             \
 806       }                                                 \
 807     consumed_chars++;                                   \
 808   } while (0)
 809
 810
 811 /* Store a byte C in the place pointed by DST and increment DST to the
 812    next free point, and increment PRODUCED_CHARS.  The caller should
 813    assure that C is 0..127, and declare and set the variable `dst'
 814    appropriately in advance.
 815 */
 816
 817
 818 #define EMIT_ONE_ASCII_BYTE(c)  \
 819   do {                          \
 820     produced_chars++;           \
 821     *dst++ = (c);               \
 822   } while (0)
 823
 824
 825 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 826
 827 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 828   do {                                  \
 829     produced_chars += 2;                \
 830     *dst++ = (c1), *dst++ = (c2);       \
 831   } while (0)
 832
 833
 834 /* Store a byte C in the place pointed by DST and increment DST to the
 835    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 836    nonzero, store in an appropriate multibyte from.  The caller should
 837    declare and set the variables `dst' and `multibytep' appropriately
 838    in advance.  */
 839
 840 #define EMIT_ONE_BYTE(c)                \
 841   do {                                  \
 842     produced_chars++;                   \
 843     if (multibytep)                     \
 844       {                                 \
 845         int ch = (c);                   \
 846         if (ch >= 0x80)                 \
 847           ch = BYTE8_TO_CHAR (ch);      \
 848         CHAR_STRING_ADVANCE (ch, dst);  \
 849       }                                 \
 850     else                                \
 851       *dst++ = (c);                     \
 852   } while (0)
 853
 854
 855 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 856
 857 #define EMIT_TWO_BYTES(c1, c2)          \
 858   do {                                  \
 859     produced_chars += 2;                \
 860     if (multibytep)                     \
 861       {                                 \
 862         int ch;                         \
 863                                         \
 864         ch = (c1);                      \
 865         if (ch >= 0x80)                 \
 866           ch = BYTE8_TO_CHAR (ch);      \
 867         CHAR_STRING_ADVANCE (ch, dst);  \
 868         ch = (c2);                      \
 869         if (ch >= 0x80)                 \
 870           ch = BYTE8_TO_CHAR (ch);      \
 871         CHAR_STRING_ADVANCE (ch, dst);  \
 872       }                                 \
 873     else                                \
 874       {                                 \
 875         *dst++ = (c1);                  \
 876         *dst++ = (c2);                  \
 877       }                                 \
 878   } while (0)
 879
 880
 881 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 882   do {                                  \
 883     EMIT_ONE_BYTE (c1);                 \
 884     EMIT_TWO_BYTES (c2, c3);            \
 885   } while (0)
 886
 887
 888 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 889   do {                                          \
 890     EMIT_TWO_BYTES (c1, c2);                    \
 891     EMIT_TWO_BYTES (c3, c4);                    \
 892   } while (0)
 893
 894
 895 /* Prototypes for static functions.  */
 896 static void record_conversion_result (struct coding_system *coding,
 897                                       enum coding_result_code result);
 898 static int detect_coding_utf_8 (struct coding_system *,
 899                                 struct coding_detection_info *info);
 900 static void decode_coding_utf_8 (struct coding_system *);
 901 static int encode_coding_utf_8 (struct coding_system *);
 902
 903 static int detect_coding_utf_16 (struct coding_system *,
 904                                  struct coding_detection_info *info);
 905 static void decode_coding_utf_16 (struct coding_system *);
 906 static int encode_coding_utf_16 (struct coding_system *);
 907
 908 static int detect_coding_iso_2022 (struct coding_system *,
 909                                    struct coding_detection_info *info);
 910 static void decode_coding_iso_2022 (struct coding_system *);
 911 static int encode_coding_iso_2022 (struct coding_system *);
 912
 913 static int detect_coding_emacs_mule (struct coding_system *,
 914                                      struct coding_detection_info *info);
 915 static void decode_coding_emacs_mule (struct coding_system *);
 916 static int encode_coding_emacs_mule (struct coding_system *);
 917
 918 static int detect_coding_sjis (struct coding_system *,
 919                                struct coding_detection_info *info);
 920 static void decode_coding_sjis (struct coding_system *);
 921 static int encode_coding_sjis (struct coding_system *);
 922
 923 static int detect_coding_big5 (struct coding_system *,
 924                                struct coding_detection_info *info);
 925 static void decode_coding_big5 (struct coding_system *);
 926 static int encode_coding_big5 (struct coding_system *);
 927
 928 static int detect_coding_ccl (struct coding_system *,
 929                               struct coding_detection_info *info);
 930 static void decode_coding_ccl (struct coding_system *);
 931 static int encode_coding_ccl (struct coding_system *);
 932
 933 static void decode_coding_raw_text (struct coding_system *);
 934 static int encode_coding_raw_text (struct coding_system *);
 935
 936 static void coding_set_source (struct coding_system *);
 937 static void coding_set_destination (struct coding_system *);
 938 static void coding_alloc_by_realloc (struct coding_system *, EMACS_INT);
 939 static void coding_alloc_by_making_gap (struct coding_system *,
 940                                         EMACS_INT, EMACS_INT);
 941 static unsigned char *alloc_destination (struct coding_system *,
 942                                          EMACS_INT, unsigned char *);
 943 static void setup_iso_safe_charsets (Lisp_Object);
 944 static unsigned char *encode_designation_at_bol (struct coding_system *,
 945                                                  int *, int *,
 946                                                  unsigned char *);
 947 static int detect_eol (const unsigned char *,
 948                        EMACS_INT, enum coding_category);
 949 static Lisp_Object adjust_coding_eol_type (struct coding_system *, int);
 950 static void decode_eol (struct coding_system *);
 951 static Lisp_Object get_translation_table (Lisp_Object, int, int *);
 952 static Lisp_Object get_translation (Lisp_Object, int *, int *);
 953 static int produce_chars (struct coding_system *, Lisp_Object, int);
 954 static INLINE void produce_charset (struct coding_system *, int *,
 955                                     EMACS_INT);
 956 static void produce_annotation (struct coding_system *, EMACS_INT);
 957 static int decode_coding (struct coding_system *);
 958 static INLINE int *handle_composition_annotation (EMACS_INT, EMACS_INT,
 959                                                   struct coding_system *,
 960                                                   int *, EMACS_INT *);
 961 static INLINE int *handle_charset_annotation (EMACS_INT, EMACS_INT,
 962                                               struct coding_system *,
 963                                               int *, EMACS_INT *);
 964 static void consume_chars (struct coding_system *, Lisp_Object, int);
 965 static int encode_coding (struct coding_system *);
 966 static Lisp_Object make_conversion_work_buffer (int);
 967 static Lisp_Object code_conversion_restore (Lisp_Object);
 968 static INLINE int char_encodable_p (int, Lisp_Object);
 969 static Lisp_Object make_subsidiaries (Lisp_Object);
 970
 971 static void
 972 record_conversion_result (struct coding_system *coding,
 973                           enum coding_result_code result)
 974 {
 975   coding->result = result;
 976   switch (result)
 977     {
 978     case CODING_RESULT_INSUFFICIENT_SRC:
 979       Vlast_code_conversion_error = Qinsufficient_source;
 980       break;
 981     case CODING_RESULT_INCONSISTENT_EOL:
 982       Vlast_code_conversion_error = Qinconsistent_eol;
 983       break;
 984     case CODING_RESULT_INVALID_SRC:
 985       Vlast_code_conversion_error = Qinvalid_source;
 986       break;
 987     case CODING_RESULT_INTERRUPT:
 988       Vlast_code_conversion_error = Qinterrupted;
 989       break;
 990     case CODING_RESULT_INSUFFICIENT_MEM:
 991       Vlast_code_conversion_error = Qinsufficient_memory;
 992       break;
 993     case CODING_RESULT_INSUFFICIENT_DST:
 994       /* Don't record this error in Vlast_code_conversion_error
 995          because it happens just temporarily and is resolved when the
 996          whole conversion is finished.  */
 997       break;
 998     case CODING_RESULT_SUCCESS:
 999       break;
1000     default:
1001       Vlast_code_conversion_error = intern ("Unknown error");
1002     }
1003 }
1004
1005 /* This wrapper macro is used to preserve validity of pointers into
1006    buffer text across calls to decode_char, which could cause
1007    relocation of buffers if it loads a charset map, because loading a
1008    charset map allocates large structures.  */
1009 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
1010   do {                                                                       \
1011     charset_map_loaded = 0;                                                  \
1012     c = DECODE_CHAR (charset, code);                                         \
1013     if (charset_map_loaded)                                                  \
1014       {                                                                      \
1015         const unsigned char *orig = coding->source;                          \
1016         EMACS_INT offset;                                                    \
1017                                                                              \
1018         coding_set_source (coding);                                          \
1019         offset = coding->source - orig;                                      \
1020         src += offset;                                                       \
1021         src_base += offset;                                                  \
1022         src_end += offset;                                                   \
1023       }                                                                      \
1024   } while (0)
1025
1026
1027 /* If there are at least BYTES length of room at dst, allocate memory
1028    for coding->destination and update dst and dst_end.  We don't have
1029    to take care of coding->source which will be relocated.  It is
1030    handled by calling coding_set_source in encode_coding.  */
1031
1032 #define ASSURE_DESTINATION(bytes)                               \
1033   do {                                                          \
1034     if (dst + (bytes) >= dst_end)                               \
1035       {                                                         \
1036         int more_bytes = charbuf_end - charbuf + (bytes);       \
1037                                                                 \
1038         dst = alloc_destination (coding, more_bytes, dst);      \
1039         dst_end = coding->destination + coding->dst_bytes;      \
1040       }                                                         \
1041   } while (0)
1042
1043
1044 /* Store multibyte form of the character C in P, and advance P to the
1045    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
1046    never calls MAYBE_UNIFY_CHAR.  */
1047
1048 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
1049   do {                                          \
1050     if ((c) <= MAX_1_BYTE_CHAR)                 \
1051       *(p)++ = (c);                             \
1052     else if ((c) <= MAX_2_BYTE_CHAR)            \
1053       *(p)++ = (0xC0 | ((c) >> 6)),             \
1054         *(p)++ = (0x80 | ((c) & 0x3F));         \
1055     else if ((c) <= MAX_3_BYTE_CHAR)            \
1056       *(p)++ = (0xE0 | ((c) >> 12)),            \
1057         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
1058         *(p)++ = (0x80 | ((c) & 0x3F));         \
1059     else if ((c) <= MAX_4_BYTE_CHAR)            \
1060       *(p)++ = (0xF0 | (c >> 18)),              \
1061         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1062         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1063         *(p)++ = (0x80 | (c & 0x3F));           \
1064     else if ((c) <= MAX_5_BYTE_CHAR)            \
1065       *(p)++ = 0xF8,                            \
1066         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
1067         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
1068         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1069         *(p)++ = (0x80 | (c & 0x3F));           \
1070     else                                        \
1071       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1072   } while (0)
1073
1074
1075 /* Return the character code of character whose multibyte form is at
1076    P, and advance P to the end of the multibyte form.  This is like
1077    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1078
1079 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1080   (!((p)[0] & 0x80)                                             \
1081    ? *(p)++                                                     \
1082    : ! ((p)[0] & 0x20)                                          \
1083    ? ((p) += 2,                                                 \
1084       ((((p)[-2] & 0x1F) << 6)                                  \
1085        | ((p)[-1] & 0x3F)                                       \
1086        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1087    : ! ((p)[0] & 0x10)                                          \
1088    ? ((p) += 3,                                                 \
1089       ((((p)[-3] & 0x0F) << 12)                                 \
1090        | (((p)[-2] & 0x3F) << 6)                                \
1091        | ((p)[-1] & 0x3F)))                                     \
1092    : ! ((p)[0] & 0x08)                                          \
1093    ? ((p) += 4,                                                 \
1094       ((((p)[-4] & 0xF) << 18)                                  \
1095        | (((p)[-3] & 0x3F) << 12)                               \
1096        | (((p)[-2] & 0x3F) << 6)                                \
1097        | ((p)[-1] & 0x3F)))                                     \
1098    : ((p) += 5,                                                 \
1099       ((((p)[-4] & 0x3F) << 18)                                 \
1100        | (((p)[-3] & 0x3F) << 12)                               \
1101        | (((p)[-2] & 0x3F) << 6)                                \
1102        | ((p)[-1] & 0x3F))))
1103
1104
1105 static void
1106 coding_set_source (struct coding_system *coding)
1107 {
1108   if (BUFFERP (coding->src_object))
1109     {
1110       struct buffer *buf = XBUFFER (coding->src_object);
1111
1112       if (coding->src_pos < 0)
1113         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1114       else
1115         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1116     }
1117   else if (STRINGP (coding->src_object))
1118     {
1119       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1120     }
1121   else
1122     /* Otherwise, the source is C string and is never relocated
1123        automatically.  Thus we don't have to update anything.  */
1124     ;
1125 }
1126
1127 static void
1128 coding_set_destination (struct coding_system *coding)
1129 {
1130   if (BUFFERP (coding->dst_object))
1131     {
1132       if (coding->src_pos < 0)
1133         {
1134           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1135           coding->dst_bytes = (GAP_END_ADDR
1136                                - (coding->src_bytes - coding->consumed)
1137                                - coding->destination);
1138         }
1139       else
1140         {
1141           /* We are sure that coding->dst_pos_byte is before the gap
1142              of the buffer. */
1143           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1144                                  + coding->dst_pos_byte - BEG_BYTE);
1145           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1146                                - coding->destination);
1147         }
1148     }
1149   else
1150     /* Otherwise, the destination is C string and is never relocated
1151        automatically.  Thus we don't have to update anything.  */
1152     ;
1153 }
1154
1155
1156 static void
1157 coding_alloc_by_realloc (struct coding_system *coding, EMACS_INT bytes)
1158 {
1159   coding->destination = (unsigned char *) xrealloc (coding->destination,
1160                                                     coding->dst_bytes + bytes);
1161   coding->dst_bytes += bytes;
1162 }
1163
1164 static void
1165 coding_alloc_by_making_gap (struct coding_system *coding,
1166                             EMACS_INT gap_head_used, EMACS_INT bytes)
1167 {
1168   if (EQ (coding->src_object, coding->dst_object))
1169     {
1170       /* The gap may contain the produced data at the head and not-yet
1171          consumed data at the tail.  To preserve those data, we at
1172          first make the gap size to zero, then increase the gap
1173          size.  */
1174       EMACS_INT add = GAP_SIZE;
1175
1176       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1177       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1178       make_gap (bytes);
1179       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1180       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1181     }
1182   else
1183     {
1184       Lisp_Object this_buffer;
1185
1186       this_buffer = Fcurrent_buffer ();
1187       set_buffer_internal (XBUFFER (coding->dst_object));
1188       make_gap (bytes);
1189       set_buffer_internal (XBUFFER (this_buffer));
1190     }
1191 }
1192
1193
1194 static unsigned char *
1195 alloc_destination (struct coding_system *coding, EMACS_INT nbytes,
1196                    unsigned char *dst)
1197 {
1198   EMACS_INT offset = dst - coding->destination;
1199
1200   if (BUFFERP (coding->dst_object))
1201     {
1202       struct buffer *buf = XBUFFER (coding->dst_object);
1203
1204       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1205     }
1206   else
1207     coding_alloc_by_realloc (coding, nbytes);
1208   coding_set_destination (coding);
1209   dst = coding->destination + offset;
1210   return dst;
1211 }
1212
1213 /** Macros for annotations.  */
1214
1215 /* An annotation data is stored in the array coding->charbuf in this
1216    format:
1217      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1218    LENGTH is the number of elements in the annotation.
1219    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1220    NCHARS is the number of characters in the text annotated.
1221
1222    The format of the following elements depend on ANNOTATION_MASK.
1223
1224    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1225    follows:
1226      ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1227
1228    NBYTES is the number of bytes specified in the header part of
1229    old-style emacs-mule encoding, or 0 for the other kind of
1230    composition.
1231
1232    METHOD is one of enum composition_method.
1233
1234    Optionnal COMPOSITION-COMPONENTS are characters and composition
1235    rules.
1236
1237    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1238    follows.
1239
1240    If ANNOTATION_MASK is 0, this annotation is just a space holder to
1241    recover from an invalid annotation, and should be skipped by
1242    produce_annotation.  */
1243
1244 /* Maximum length of the header of annotation data.  */
1245 #define MAX_ANNOTATION_LENGTH 5
1246
1247 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1248   do {                                                  \
1249     *(buf)++ = -(len);                                  \
1250     *(buf)++ = (mask);                                  \
1251     *(buf)++ = (nchars);                                \
1252     coding->annotated = 1;                              \
1253   } while (0);
1254
1255 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method)                   \
1256   do {                                                                      \
1257     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1258     *buf++ = nbytes;                                                        \
1259     *buf++ = method;                                                        \
1260   } while (0)
1261
1262
1263 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1264   do {                                                                  \
1265     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1266     *buf++ = id;                                                        \
1267   } while (0)
1268
1269 \f
1270 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1271
1272
1273
1274 \f
1275 /*** 3. UTF-8 ***/
1276
1277 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1278    Check if a text is encoded in UTF-8.  If it is, return 1, else
1279    return 0.  */
1280
1281 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1282 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1283 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1284 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1285 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1286 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1287
1288 #define UTF_BOM 0xFEFF
1289 #define UTF_8_BOM_1 0xEF
1290 #define UTF_8_BOM_2 0xBB
1291 #define UTF_8_BOM_3 0xBF
1292
1293 static int
1294 detect_coding_utf_8 (struct coding_system *coding,
1295                      struct coding_detection_info *detect_info)
1296 {
1297   const unsigned char *src = coding->source, *src_base;
1298   const unsigned char *src_end = coding->source + coding->src_bytes;
1299   int multibytep = coding->src_multibyte;
1300   int consumed_chars = 0;
1301   int bom_found = 0;
1302   int found = 0;
1303
1304   detect_info->checked |= CATEGORY_MASK_UTF_8;
1305   /* A coding system of this category is always ASCII compatible.  */
1306   src += coding->head_ascii;
1307
1308   while (1)
1309     {
1310       int c, c1, c2, c3, c4;
1311
1312       src_base = src;
1313       ONE_MORE_BYTE (c);
1314       if (c < 0 || UTF_8_1_OCTET_P (c))
1315         continue;
1316       ONE_MORE_BYTE (c1);
1317       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1318         break;
1319       if (UTF_8_2_OCTET_LEADING_P (c))
1320         {
1321           found = 1;
1322           continue;
1323         }
1324       ONE_MORE_BYTE (c2);
1325       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1326         break;
1327       if (UTF_8_3_OCTET_LEADING_P (c))
1328         {
1329           found = 1;
1330           if (src_base == coding->source
1331               && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
1332             bom_found = 1;
1333           continue;
1334         }
1335       ONE_MORE_BYTE (c3);
1336       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1337         break;
1338       if (UTF_8_4_OCTET_LEADING_P (c))
1339         {
1340           found = 1;
1341           continue;
1342         }
1343       ONE_MORE_BYTE (c4);
1344       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1345         break;
1346       if (UTF_8_5_OCTET_LEADING_P (c))
1347         {
1348           found = 1;
1349           continue;
1350         }
1351       break;
1352     }
1353   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1354   return 0;
1355
1356  no_more_source:
1357   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1358     {
1359       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1360       return 0;
1361     }
1362   if (bom_found)
1363     {
1364       /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
1365       detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
1366     }
1367   else
1368     {
1369       detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
1370       if (found)
1371         detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
1372     }
1373   return 1;
1374 }
1375
1376
1377 static void
1378 decode_coding_utf_8 (struct coding_system *coding)
1379 {
1380   const unsigned char *src = coding->source + coding->consumed;
1381   const unsigned char *src_end = coding->source + coding->src_bytes;
1382   const unsigned char *src_base;
1383   int *charbuf = coding->charbuf + coding->charbuf_used;
1384   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1385   int consumed_chars = 0, consumed_chars_base = 0;
1386   int multibytep = coding->src_multibyte;
1387   enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
1388   Lisp_Object attr, charset_list;
1389   int eol_crlf =
1390     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1391   int byte_after_cr = -1;
1392
1393   CODING_GET_INFO (coding, attr, charset_list);
1394
1395   if (bom != utf_without_bom)
1396     {
1397       int c1, c2, c3;
1398
1399       src_base = src;
1400       ONE_MORE_BYTE (c1);
1401       if (! UTF_8_3_OCTET_LEADING_P (c1))
1402         src = src_base;
1403       else
1404         {
1405           ONE_MORE_BYTE (c2);
1406           if (! UTF_8_EXTRA_OCTET_P (c2))
1407             src = src_base;
1408           else
1409             {
1410               ONE_MORE_BYTE (c3);
1411               if (! UTF_8_EXTRA_OCTET_P (c3))
1412                 src = src_base;
1413               else
1414                 {
1415                   if ((c1 != UTF_8_BOM_1)
1416                       || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
1417                     src = src_base;
1418                   else
1419                     CODING_UTF_8_BOM (coding) = utf_without_bom;
1420                 }
1421             }
1422         }
1423     }
1424   CODING_UTF_8_BOM (coding) = utf_without_bom;
1425
1426   while (1)
1427     {
1428       int c, c1, c2, c3, c4, c5;
1429
1430       src_base = src;
1431       consumed_chars_base = consumed_chars;
1432
1433       if (charbuf >= charbuf_end)
1434         {
1435           if (byte_after_cr >= 0)
1436             src_base--;
1437           break;
1438         }
1439
1440       if (byte_after_cr >= 0)
1441         c1 = byte_after_cr, byte_after_cr = -1;
1442       else
1443         ONE_MORE_BYTE (c1);
1444       if (c1 < 0)
1445         {
1446           c = - c1;
1447         }
1448       else if (UTF_8_1_OCTET_P (c1))
1449         {
1450           if (eol_crlf && c1 == '\r')
1451             ONE_MORE_BYTE (byte_after_cr);
1452           c = c1;
1453         }
1454       else
1455         {
1456           ONE_MORE_BYTE (c2);
1457           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1458             goto invalid_code;
1459           if (UTF_8_2_OCTET_LEADING_P (c1))
1460             {
1461               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1462               /* Reject overlong sequences here and below.  Encoders
1463                  producing them are incorrect, they can be misleading,
1464                  and they mess up read/write invariance.  */
1465               if (c < 128)
1466                 goto invalid_code;
1467             }
1468           else
1469             {
1470               ONE_MORE_BYTE (c3);
1471               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1472                 goto invalid_code;
1473               if (UTF_8_3_OCTET_LEADING_P (c1))
1474                 {
1475                   c = (((c1 & 0xF) << 12)
1476                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1477                   if (c < 0x800
1478                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1479                     goto invalid_code;
1480                 }
1481               else
1482                 {
1483                   ONE_MORE_BYTE (c4);
1484                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1485                     goto invalid_code;
1486                   if (UTF_8_4_OCTET_LEADING_P (c1))
1487                     {
1488                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1489                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1490                     if (c < 0x10000)
1491                       goto invalid_code;
1492                     }
1493                   else
1494                     {
1495                       ONE_MORE_BYTE (c5);
1496                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1497                         goto invalid_code;
1498                       if (UTF_8_5_OCTET_LEADING_P (c1))
1499                         {
1500                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1501                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1502                                | (c5 & 0x3F));
1503                           if ((c > MAX_CHAR) || (c < 0x200000))
1504                             goto invalid_code;
1505                         }
1506                       else
1507                         goto invalid_code;
1508                     }
1509                 }
1510             }
1511         }
1512
1513       *charbuf++ = c;
1514       continue;
1515
1516     invalid_code:
1517       src = src_base;
1518       consumed_chars = consumed_chars_base;
1519       ONE_MORE_BYTE (c);
1520       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1521       coding->errors++;
1522     }
1523
1524  no_more_source:
1525   coding->consumed_char += consumed_chars_base;
1526   coding->consumed = src_base - coding->source;
1527   coding->charbuf_used = charbuf - coding->charbuf;
1528 }
1529
1530
1531 static int
1532 encode_coding_utf_8 (struct coding_system *coding)
1533 {
1534   int multibytep = coding->dst_multibyte;
1535   int *charbuf = coding->charbuf;
1536   int *charbuf_end = charbuf + coding->charbuf_used;
1537   unsigned char *dst = coding->destination + coding->produced;
1538   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1539   int produced_chars = 0;
1540   int c;
1541
1542   if (CODING_UTF_8_BOM (coding) == utf_with_bom)
1543     {
1544       ASSURE_DESTINATION (3);
1545       EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
1546       CODING_UTF_8_BOM (coding) = utf_without_bom;
1547     }
1548
1549   if (multibytep)
1550     {
1551       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1552
1553       while (charbuf < charbuf_end)
1554         {
1555           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1556
1557           ASSURE_DESTINATION (safe_room);
1558           c = *charbuf++;
1559           if (CHAR_BYTE8_P (c))
1560             {
1561               c = CHAR_TO_BYTE8 (c);
1562               EMIT_ONE_BYTE (c);
1563             }
1564           else
1565             {
1566               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1567               for (p = str; p < pend; p++)
1568                 EMIT_ONE_BYTE (*p);
1569             }
1570         }
1571     }
1572   else
1573     {
1574       int safe_room = MAX_MULTIBYTE_LENGTH;
1575
1576       while (charbuf < charbuf_end)
1577         {
1578           ASSURE_DESTINATION (safe_room);
1579           c = *charbuf++;
1580           if (CHAR_BYTE8_P (c))
1581             *dst++ = CHAR_TO_BYTE8 (c);
1582           else
1583             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1584           produced_chars++;
1585         }
1586     }
1587   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1588   coding->produced_char += produced_chars;
1589   coding->produced = dst - coding->destination;
1590   return 0;
1591 }
1592
1593
1594 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1595    Check if a text is encoded in one of UTF-16 based coding systems.
1596    If it is, return 1, else return 0.  */
1597
1598 #define UTF_16_HIGH_SURROGATE_P(val) \
1599   (((val) & 0xFC00) == 0xD800)
1600
1601 #define UTF_16_LOW_SURROGATE_P(val) \
1602   (((val) & 0xFC00) == 0xDC00)
1603
1604 #define UTF_16_INVALID_P(val)   \
1605   (((val) == 0xFFFE)            \
1606    || ((val) == 0xFFFF)         \
1607    || UTF_16_LOW_SURROGATE_P (val))
1608
1609
1610 static int
1611 detect_coding_utf_16 (struct coding_system *coding,
1612                       struct coding_detection_info *detect_info)
1613 {
1614   const unsigned char *src = coding->source, *src_base = src;
1615   const unsigned char *src_end = coding->source + coding->src_bytes;
1616   int multibytep = coding->src_multibyte;
1617   int consumed_chars = 0;
1618   int c1, c2;
1619
1620   detect_info->checked |= CATEGORY_MASK_UTF_16;
1621   if (coding->mode & CODING_MODE_LAST_BLOCK
1622       && (coding->src_chars & 1))
1623     {
1624       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1625       return 0;
1626     }
1627
1628   TWO_MORE_BYTES (c1, c2);
1629   if ((c1 == 0xFF) && (c2 == 0xFE))
1630     {
1631       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1632                              | CATEGORY_MASK_UTF_16_AUTO);
1633       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1634                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1635                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1636     }
1637   else if ((c1 == 0xFE) && (c2 == 0xFF))
1638     {
1639       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1640                              | CATEGORY_MASK_UTF_16_AUTO);
1641       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1642                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1643                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1644     }
1645   else if (c2 < 0)
1646     {
1647       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1648       return 0;
1649     }
1650   else
1651     {
1652       /* We check the dispersion of Eth and Oth bytes where E is even and
1653          O is odd.  If both are high, we assume binary data.*/
1654       unsigned char e[256], o[256];
1655       unsigned e_num = 1, o_num = 1;
1656
1657       memset (e, 0, 256);
1658       memset (o, 0, 256);
1659       e[c1] = 1;
1660       o[c2] = 1;
1661
1662       detect_info->rejected |= (CATEGORY_MASK_UTF_16_AUTO
1663                                 |CATEGORY_MASK_UTF_16_BE
1664                                 | CATEGORY_MASK_UTF_16_LE);
1665
1666       while ((detect_info->rejected & CATEGORY_MASK_UTF_16)
1667              != CATEGORY_MASK_UTF_16)
1668         {
1669           TWO_MORE_BYTES (c1, c2);
1670           if (c2 < 0)
1671             break;
1672           if (! e[c1])
1673             {
1674               e[c1] = 1;
1675               e_num++;
1676               if (e_num >= 128)
1677                 detect_info->rejected |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1678             }
1679           if (! o[c2])
1680             {
1681               o[c2] = 1;
1682               o_num++;
1683               if (o_num >= 128)
1684                 detect_info->rejected |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1685             }
1686         }
1687       return 0;
1688     }
1689
1690  no_more_source:
1691   return 1;
1692 }
1693
1694 static void
1695 decode_coding_utf_16 (struct coding_system *coding)
1696 {
1697   const unsigned char *src = coding->source + coding->consumed;
1698   const unsigned char *src_end = coding->source + coding->src_bytes;
1699   const unsigned char *src_base;
1700   int *charbuf = coding->charbuf + coding->charbuf_used;
1701   /* We may produces at most 3 chars in one loop.  */
1702   int *charbuf_end = coding->charbuf + coding->charbuf_size - 2;
1703   int consumed_chars = 0, consumed_chars_base = 0;
1704   int multibytep = coding->src_multibyte;
1705   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1706   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1707   int surrogate = CODING_UTF_16_SURROGATE (coding);
1708   Lisp_Object attr, charset_list;
1709   int eol_crlf =
1710     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1711   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1712
1713   CODING_GET_INFO (coding, attr, charset_list);
1714
1715   if (bom == utf_with_bom)
1716     {
1717       int c, c1, c2;
1718
1719       src_base = src;
1720       ONE_MORE_BYTE (c1);
1721       ONE_MORE_BYTE (c2);
1722       c = (c1 << 8) | c2;
1723
1724       if (endian == utf_16_big_endian
1725           ? c != 0xFEFF : c != 0xFFFE)
1726         {
1727           /* The first two bytes are not BOM.  Treat them as bytes
1728              for a normal character.  */
1729           src = src_base;
1730           coding->errors++;
1731         }
1732       CODING_UTF_16_BOM (coding) = utf_without_bom;
1733     }
1734   else if (bom == utf_detect_bom)
1735     {
1736       /* We have already tried to detect BOM and failed in
1737          detect_coding.  */
1738       CODING_UTF_16_BOM (coding) = utf_without_bom;
1739     }
1740
1741   while (1)
1742     {
1743       int c, c1, c2;
1744
1745       src_base = src;
1746       consumed_chars_base = consumed_chars;
1747
1748       if (charbuf >= charbuf_end)
1749         {
1750           if (byte_after_cr1 >= 0)
1751             src_base -= 2;
1752           break;
1753         }
1754
1755       if (byte_after_cr1 >= 0)
1756         c1 = byte_after_cr1, byte_after_cr1 = -1;
1757       else
1758         ONE_MORE_BYTE (c1);
1759       if (c1 < 0)
1760         {
1761           *charbuf++ = -c1;
1762           continue;
1763         }
1764       if (byte_after_cr2 >= 0)
1765         c2 = byte_after_cr2, byte_after_cr2 = -1;
1766       else
1767         ONE_MORE_BYTE (c2);
1768       if (c2 < 0)
1769         {
1770           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1771           *charbuf++ = -c2;
1772           continue;
1773         }
1774       c = (endian == utf_16_big_endian
1775            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1776
1777       if (surrogate)
1778         {
1779           if (! UTF_16_LOW_SURROGATE_P (c))
1780             {
1781               if (endian == utf_16_big_endian)
1782                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1783               else
1784                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1785               *charbuf++ = c1;
1786               *charbuf++ = c2;
1787               coding->errors++;
1788               if (UTF_16_HIGH_SURROGATE_P (c))
1789                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1790               else
1791                 *charbuf++ = c;
1792             }
1793           else
1794             {
1795               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1796               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1797               *charbuf++ = 0x10000 + c;
1798             }
1799         }
1800       else
1801         {
1802           if (UTF_16_HIGH_SURROGATE_P (c))
1803             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1804           else
1805             {
1806               if (eol_crlf && c == '\r')
1807                 {
1808                   ONE_MORE_BYTE (byte_after_cr1);
1809                   ONE_MORE_BYTE (byte_after_cr2);
1810                 }
1811               *charbuf++ = c;
1812             }
1813         }
1814     }
1815
1816  no_more_source:
1817   coding->consumed_char += consumed_chars_base;
1818   coding->consumed = src_base - coding->source;
1819   coding->charbuf_used = charbuf - coding->charbuf;
1820 }
1821
1822 static int
1823 encode_coding_utf_16 (struct coding_system *coding)
1824 {
1825   int multibytep = coding->dst_multibyte;
1826   int *charbuf = coding->charbuf;
1827   int *charbuf_end = charbuf + coding->charbuf_used;
1828   unsigned char *dst = coding->destination + coding->produced;
1829   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1830   int safe_room = 8;
1831   enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
1832   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1833   int produced_chars = 0;
1834   Lisp_Object attrs, charset_list;
1835   int c;
1836
1837   CODING_GET_INFO (coding, attrs, charset_list);
1838
1839   if (bom != utf_without_bom)
1840     {
1841       ASSURE_DESTINATION (safe_room);
1842       if (big_endian)
1843         EMIT_TWO_BYTES (0xFE, 0xFF);
1844       else
1845         EMIT_TWO_BYTES (0xFF, 0xFE);
1846       CODING_UTF_16_BOM (coding) = utf_without_bom;
1847     }
1848
1849   while (charbuf < charbuf_end)
1850     {
1851       ASSURE_DESTINATION (safe_room);
1852       c = *charbuf++;
1853       if (c > MAX_UNICODE_CHAR)
1854         c = coding->default_char;
1855
1856       if (c < 0x10000)
1857         {
1858           if (big_endian)
1859             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1860           else
1861             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1862         }
1863       else
1864         {
1865           int c1, c2;
1866
1867           c -= 0x10000;
1868           c1 = (c >> 10) + 0xD800;
1869           c2 = (c & 0x3FF) + 0xDC00;
1870           if (big_endian)
1871             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1872           else
1873             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1874         }
1875     }
1876   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1877   coding->produced = dst - coding->destination;
1878   coding->produced_char += produced_chars;
1879   return 0;
1880 }
1881
1882 \f
1883 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1884
1885 /* Emacs' internal format for representation of multiple character
1886    sets is a kind of multi-byte encoding, i.e. characters are
1887    represented by variable-length sequences of one-byte codes.
1888
1889    ASCII characters and control characters (e.g. `tab', `newline') are
1890    represented by one-byte sequences which are their ASCII codes, in
1891    the range 0x00 through 0x7F.
1892
1893    8-bit characters of the range 0x80..0x9F are represented by
1894    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1895    code + 0x20).
1896
1897    8-bit characters of the range 0xA0..0xFF are represented by
1898    one-byte sequences which are their 8-bit code.
1899
1900    The other characters are represented by a sequence of `base
1901    leading-code', optional `extended leading-code', and one or two
1902    `position-code's.  The length of the sequence is determined by the
1903    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1904    whereas extended leading-code and position-code take the range 0xA0
1905    through 0xFF.  See `charset.h' for more details about leading-code
1906    and position-code.
1907
1908    --- CODE RANGE of Emacs' internal format ---
1909    character set        range
1910    -------------        -----
1911    ascii                0x00..0x7F
1912    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1913    eight-bit-graphic    0xA0..0xBF
1914    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1915    ---------------------------------------------
1916
1917    As this is the internal character representation, the format is
1918    usually not used externally (i.e. in a file or in a data sent to a
1919    process).  But, it is possible to have a text externally in this
1920    format (i.e. by encoding by the coding system `emacs-mule').
1921
1922    In that case, a sequence of one-byte codes has a slightly different
1923    form.
1924
1925    At first, all characters in eight-bit-control are represented by
1926    one-byte sequences which are their 8-bit code.
1927
1928    Next, character composition data are represented by the byte
1929    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1930    where,
1931         METHOD is 0xF2 plus one of composition method (enum
1932         composition_method),
1933
1934         BYTES is 0xA0 plus a byte length of this composition data,
1935
1936         CHARS is 0xA0 plus a number of characters composed by this
1937         data,
1938
1939         COMPONENTs are characters of multibye form or composition
1940         rules encoded by two-byte of ASCII codes.
1941
1942    In addition, for backward compatibility, the following formats are
1943    also recognized as composition data on decoding.
1944
1945    0x80 MSEQ ...
1946    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1947
1948    Here,
1949         MSEQ is a multibyte form but in these special format:
1950           ASCII: 0xA0 ASCII_CODE+0x80,
1951           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1952         RULE is a one byte code of the range 0xA0..0xF0 that
1953         represents a composition rule.
1954   */
1955
1956 char emacs_mule_bytes[256];
1957
1958
1959 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1960    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1961    else return 0.  */
1962
1963 static int
1964 detect_coding_emacs_mule (struct coding_system *coding,
1965                           struct coding_detection_info *detect_info)
1966 {
1967   const unsigned char *src = coding->source, *src_base;
1968   const unsigned char *src_end = coding->source + coding->src_bytes;
1969   int multibytep = coding->src_multibyte;
1970   int consumed_chars = 0;
1971   int c;
1972   int found = 0;
1973
1974   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1975   /* A coding system of this category is always ASCII compatible.  */
1976   src += coding->head_ascii;
1977
1978   while (1)
1979     {
1980       src_base = src;
1981       ONE_MORE_BYTE (c);
1982       if (c < 0)
1983         continue;
1984       if (c == 0x80)
1985         {
1986           /* Perhaps the start of composite character.  We simply skip
1987              it because analyzing it is too heavy for detecting.  But,
1988              at least, we check that the composite character
1989              constitutes of more than 4 bytes.  */
1990           const unsigned char *src_base;
1991
1992         repeat:
1993           src_base = src;
1994           do
1995             {
1996               ONE_MORE_BYTE (c);
1997             }
1998           while (c >= 0xA0);
1999
2000           if (src - src_base <= 4)
2001             break;
2002           found = CATEGORY_MASK_EMACS_MULE;
2003           if (c == 0x80)
2004             goto repeat;
2005         }
2006
2007       if (c < 0x80)
2008         {
2009           if (c < 0x20
2010               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2011             break;
2012         }
2013       else
2014         {
2015           int more_bytes = emacs_mule_bytes[*src_base] - 1;
2016
2017           while (more_bytes > 0)
2018             {
2019               ONE_MORE_BYTE (c);
2020               if (c < 0xA0)
2021                 {
2022                   src--;        /* Unread the last byte.  */
2023                   break;
2024                 }
2025               more_bytes--;
2026             }
2027           if (more_bytes != 0)
2028             break;
2029           found = CATEGORY_MASK_EMACS_MULE;
2030         }
2031     }
2032   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2033   return 0;
2034
2035  no_more_source:
2036   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2037     {
2038       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2039       return 0;
2040     }
2041   detect_info->found |= found;
2042   return 1;
2043 }
2044
2045
2046 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2047    character.  If CMP_STATUS indicates that we must expect MSEQ or
2048    RULE described above, decode it and return the negative value of
2049    the decoded character or rule.  If an invalid byte is found, return
2050    -1.  If SRC is too short, return -2.  */
2051
2052 int
2053 emacs_mule_char (struct coding_system *coding, const unsigned char *src,
2054                  int *nbytes, int *nchars, int *id,
2055                  struct composition_status *cmp_status)
2056 {
2057   const unsigned char *src_end = coding->source + coding->src_bytes;
2058   const unsigned char *src_base = src;
2059   int multibytep = coding->src_multibyte;
2060   struct charset *charset;
2061   unsigned code;
2062   int c;
2063   int consumed_chars = 0;
2064   int mseq_found = 0;
2065
2066   ONE_MORE_BYTE (c);
2067   if (c < 0)
2068     {
2069       c = -c;
2070       charset = emacs_mule_charset[0];
2071     }
2072   else
2073     {
2074       if (c >= 0xA0)
2075         {
2076           if (cmp_status->state != COMPOSING_NO
2077               && cmp_status->old_form)
2078             {
2079               if (cmp_status->state == COMPOSING_CHAR)
2080                 {
2081                   if (c == 0xA0)
2082                     {
2083                       ONE_MORE_BYTE (c);
2084                       c -= 0x80;
2085                       if (c < 0)
2086                         goto invalid_code;
2087                     }
2088                   else
2089                     c -= 0x20;
2090                   mseq_found = 1;
2091                 }
2092               else
2093                 {
2094                   *nbytes = src - src_base;
2095                   *nchars = consumed_chars;
2096                   return -c;
2097                 }
2098             }
2099           else
2100             goto invalid_code;
2101         }
2102
2103       switch (emacs_mule_bytes[c])
2104         {
2105         case 2:
2106           if (! (charset = emacs_mule_charset[c]))
2107             goto invalid_code;
2108           ONE_MORE_BYTE (c);
2109           if (c < 0xA0)
2110             goto invalid_code;
2111           code = c & 0x7F;
2112           break;
2113
2114         case 3:
2115           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
2116               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
2117             {
2118               ONE_MORE_BYTE (c);
2119               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
2120                 goto invalid_code;
2121               ONE_MORE_BYTE (c);
2122               if (c < 0xA0)
2123                 goto invalid_code;
2124               code = c & 0x7F;
2125             }
2126           else
2127             {
2128               if (! (charset = emacs_mule_charset[c]))
2129                 goto invalid_code;
2130               ONE_MORE_BYTE (c);
2131               if (c < 0xA0)
2132                 goto invalid_code;
2133               code = (c & 0x7F) << 8;
2134               ONE_MORE_BYTE (c);
2135               if (c < 0xA0)
2136                 goto invalid_code;
2137               code |= c & 0x7F;
2138             }
2139           break;
2140
2141         case 4:
2142           ONE_MORE_BYTE (c);
2143           if (c < 0 || ! (charset = emacs_mule_charset[c]))
2144             goto invalid_code;
2145           ONE_MORE_BYTE (c);
2146           if (c < 0xA0)
2147             goto invalid_code;
2148           code = (c & 0x7F) << 8;
2149           ONE_MORE_BYTE (c);
2150           if (c < 0xA0)
2151             goto invalid_code;
2152           code |= c & 0x7F;
2153           break;
2154
2155         case 1:
2156           code = c;
2157           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
2158                                      ? charset_ascii : charset_eight_bit);
2159           break;
2160
2161         default:
2162           abort ();
2163         }
2164       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, code, c);
2165       if (c < 0)
2166         goto invalid_code;
2167     }
2168   *nbytes = src - src_base;
2169   *nchars = consumed_chars;
2170   if (id)
2171     *id = charset->id;
2172   return (mseq_found ? -c : c);
2173
2174  no_more_source:
2175   return -2;
2176
2177  invalid_code:
2178   return -1;
2179 }
2180
2181
2182 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2183
2184 /* Handle these composition sequence ('|': the end of header elements,
2185    BYTES and CHARS >= 0xA0):
2186
2187    (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2188    (2) altchar composition:  0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2189    (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2190
2191    and these old form:
2192
2193    (4) relative composition: 0x80 | MSEQ ... MSEQ
2194    (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2195
2196    When the starter 0x80 and the following header elements are found,
2197    this annotation header is produced.
2198
2199         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2200
2201    NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2202    NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2203
2204    Then, upon reading the following elements, these codes are produced
2205    until the composition end is found:
2206
2207    (1) CHAR ... CHAR
2208    (2) ALT ... ALT CHAR ... CHAR
2209    (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2210    (4) CHAR ... CHAR
2211    (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2212
2213    When the composition end is found, LENGTH and NCHARS in the
2214    annotation header is updated as below:
2215
2216    (1) LENGTH: unchanged, NCHARS: unchanged
2217    (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2218    (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2219    (4) LENGTH: unchanged,  NCHARS: number of CHARs
2220    (5) LENGTH: unchanged,  NCHARS: number of CHARs
2221
2222    If an error is found while composing, the annotation header is
2223    changed to the original composition header (plus filler -1s) as
2224    below:
2225
2226    (1),(2),(3)  [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2227    (5)          [ 0x80 0xFF -1 -1- -1 ]
2228
2229    and the sequence [ -2 DECODED-RULE ] is changed to the original
2230    byte sequence as below:
2231         o the original byte sequence is B: [ B -1 ]
2232         o the original byte sequence is B1 B2: [ B1 B2 ]
2233
2234    Most of the routines are implemented by macros because many
2235    variables and labels in the caller decode_coding_emacs_mule must be
2236    accessible, and they are usually called just once (thus doesn't
2237    increase the size of compiled object).  */
2238
2239 /* Decode a composition rule represented by C as a component of
2240    composition sequence of Emacs 20 style.  Set RULE to the decoded
2241    rule. */
2242
2243 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule)  \
2244   do {                                                  \
2245     int gref, nref;                                     \
2246                                                         \
2247     c -= 0xA0;                                          \
2248     if (c < 0 || c >= 81)                               \
2249       goto invalid_code;                                \
2250     gref = c / 9, nref = c % 9;                         \
2251     if (gref == 4) gref = 10;                           \
2252     if (nref == 4) nref = 10;                           \
2253     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2254   } while (0)
2255
2256
2257 /* Decode a composition rule represented by C and the following byte
2258    at SRC as a component of composition sequence of Emacs 21 style.
2259    Set RULE to the decoded rule.  */
2260
2261 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule)  \
2262   do {                                                  \
2263     int gref, nref;                                     \
2264                                                         \
2265     gref = c - 0x20;                                    \
2266     if (gref < 0 || gref >= 81)                         \
2267       goto invalid_code;                                \
2268     ONE_MORE_BYTE (c);                                  \
2269     nref = c - 0x20;                                    \
2270     if (nref < 0 || nref >= 81)                         \
2271       goto invalid_code;                                \
2272     rule = COMPOSITION_ENCODE_RULE (gref, nref);        \
2273   } while (0)
2274
2275
2276 /* Start of Emacs 21 style format.  The first three bytes at SRC are
2277    (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2278    byte length of this composition information, CHARS is the number of
2279    characters composed by this composition.  */
2280
2281 #define DECODE_EMACS_MULE_21_COMPOSITION()                              \
2282   do {                                                                  \
2283     enum composition_method method = c - 0xF2;                          \
2284     int *charbuf_base = charbuf;                                        \
2285     int nbytes, nchars;                                                 \
2286                                                                         \
2287     ONE_MORE_BYTE (c);                                                  \
2288     if (c < 0)                                                          \
2289       goto invalid_code;                                                \
2290     nbytes = c - 0xA0;                                                  \
2291     if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4))  \
2292       goto invalid_code;                                                \
2293     ONE_MORE_BYTE (c);                                                  \
2294     nchars = c - 0xA0;                                                  \
2295     if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS)            \
2296       goto invalid_code;                                                \
2297     cmp_status->old_form = 0;                                           \
2298     cmp_status->method = method;                                        \
2299     if (method == COMPOSITION_RELATIVE)                                 \
2300       cmp_status->state = COMPOSING_CHAR;                               \
2301     else                                                                \
2302       cmp_status->state = COMPOSING_COMPONENT_CHAR;                     \
2303     cmp_status->length = MAX_ANNOTATION_LENGTH;                         \
2304     cmp_status->nchars = nchars;                                        \
2305     cmp_status->ncomps = nbytes - 4;                                    \
2306     ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method);             \
2307   } while (0)
2308
2309
2310 /* Start of Emacs 20 style format for relative composition.  */
2311
2312 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION()             \
2313   do {                                                          \
2314     cmp_status->old_form = 1;                                   \
2315     cmp_status->method = COMPOSITION_RELATIVE;                  \
2316     cmp_status->state = COMPOSING_CHAR;                         \
2317     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2318     cmp_status->nchars = cmp_status->ncomps = 0;                \
2319     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2320   } while (0)
2321
2322
2323 /* Start of Emacs 20 style format for rule-base composition.  */
2324
2325 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION()             \
2326   do {                                                          \
2327     cmp_status->old_form = 1;                                   \
2328     cmp_status->method = COMPOSITION_WITH_RULE;                 \
2329     cmp_status->state = COMPOSING_CHAR;                         \
2330     cmp_status->length = MAX_ANNOTATION_LENGTH;                 \
2331     cmp_status->nchars = cmp_status->ncomps = 0;                \
2332     ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);   \
2333   } while (0)
2334
2335
2336 #define DECODE_EMACS_MULE_COMPOSITION_START()           \
2337   do {                                                  \
2338     const unsigned char *current_src = src;             \
2339                                                         \
2340     ONE_MORE_BYTE (c);                                  \
2341     if (c < 0)                                          \
2342       goto invalid_code;                                \
2343     if (c - 0xF2 >= COMPOSITION_RELATIVE                \
2344         && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)  \
2345       DECODE_EMACS_MULE_21_COMPOSITION ();              \
2346     else if (c < 0xA0)                                  \
2347       goto invalid_code;                                \
2348     else if (c < 0xC0)                                  \
2349       {                                                 \
2350         DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION ();   \
2351         /* Re-read C as a composition component.  */    \
2352         src = current_src;                              \
2353       }                                                 \
2354     else if (c == 0xFF)                                 \
2355       DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION ();     \
2356     else                                                \
2357       goto invalid_code;                                \
2358   } while (0)
2359
2360 #define EMACS_MULE_COMPOSITION_END()                            \
2361   do {                                                          \
2362     int idx = - cmp_status->length;                             \
2363                                                                 \
2364     if (cmp_status->old_form)                                   \
2365       charbuf[idx + 2] = cmp_status->nchars;                    \
2366     else if (cmp_status->method > COMPOSITION_RELATIVE)         \
2367       charbuf[idx] = charbuf[idx + 2] - cmp_status->length;     \
2368     cmp_status->state = COMPOSING_NO;                           \
2369   } while (0)
2370
2371
2372 static int
2373 emacs_mule_finish_composition (int *charbuf,
2374                                struct composition_status *cmp_status)
2375 {
2376   int idx = - cmp_status->length;
2377   int new_chars;
2378
2379   if (cmp_status->old_form && cmp_status->nchars > 0)
2380     {
2381       charbuf[idx + 2] = cmp_status->nchars;
2382       new_chars = 0;
2383       if (cmp_status->method == COMPOSITION_WITH_RULE
2384           && cmp_status->state == COMPOSING_CHAR)
2385         {
2386           /* The last rule was invalid.  */
2387           int rule = charbuf[-1] + 0xA0;
2388
2389           charbuf[-2] = BYTE8_TO_CHAR (rule);
2390           charbuf[-1] = -1;
2391           new_chars = 1;
2392         }
2393     }
2394   else
2395     {
2396       charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2397
2398       if (cmp_status->method == COMPOSITION_WITH_RULE)
2399         {
2400           charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2401           charbuf[idx++] = -3;
2402           charbuf[idx++] = 0;
2403           new_chars = 1;
2404         }
2405       else
2406         {
2407           int nchars = charbuf[idx + 1] + 0xA0;
2408           int nbytes = charbuf[idx + 2] + 0xA0;
2409
2410           charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2411           charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2412           charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2413           charbuf[idx++] = -1;
2414           new_chars = 4;
2415         }
2416     }
2417   cmp_status->state = COMPOSING_NO;
2418   return new_chars;
2419 }
2420
2421 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION()                             \
2422   do {                                                                    \
2423     if (cmp_status->state != COMPOSING_NO)                                \
2424       char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2425   } while (0)
2426
2427
2428 static void
2429 decode_coding_emacs_mule (struct coding_system *coding)
2430 {
2431   const unsigned char *src = coding->source + coding->consumed;
2432   const unsigned char *src_end = coding->source + coding->src_bytes;
2433   const unsigned char *src_base;
2434   int *charbuf = coding->charbuf + coding->charbuf_used;
2435   /* We may produce two annocations (charset and composition) in one
2436      loop and one more charset annocation at the end.  */
2437   int *charbuf_end
2438     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
2439   int consumed_chars = 0, consumed_chars_base;
2440   int multibytep = coding->src_multibyte;
2441   Lisp_Object attrs, charset_list;
2442   int char_offset = coding->produced_char;
2443   int last_offset = char_offset;
2444   int last_id = charset_ascii;
2445   int eol_crlf =
2446     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2447   int byte_after_cr = -1;
2448   struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2449
2450   CODING_GET_INFO (coding, attrs, charset_list);
2451
2452   if (cmp_status->state != COMPOSING_NO)
2453     {
2454       int i;
2455
2456       for (i = 0; i < cmp_status->length; i++)
2457         *charbuf++ = cmp_status->carryover[i];
2458       coding->annotated = 1;
2459     }
2460
2461   while (1)
2462     {
2463       int c, id;
2464
2465       src_base = src;
2466       consumed_chars_base = consumed_chars;
2467
2468       if (charbuf >= charbuf_end)
2469         {
2470           if (byte_after_cr >= 0)
2471             src_base--;
2472           break;
2473         }
2474
2475       if (byte_after_cr >= 0)
2476         c = byte_after_cr, byte_after_cr = -1;
2477       else
2478         ONE_MORE_BYTE (c);
2479
2480       if (c < 0 || c == 0x80)
2481         {
2482           EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2483           if (c < 0)
2484             {
2485               *charbuf++ = -c;
2486               char_offset++;
2487             }
2488           else
2489             DECODE_EMACS_MULE_COMPOSITION_START ();
2490           continue;
2491         }
2492
2493       if (c < 0x80)
2494         {
2495           if (eol_crlf && c == '\r')
2496             ONE_MORE_BYTE (byte_after_cr);
2497           id = charset_ascii;
2498           if (cmp_status->state != COMPOSING_NO)
2499             {
2500               if (cmp_status->old_form)
2501                 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2502               else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2503                 cmp_status->ncomps--;
2504             }
2505         }
2506       else
2507         {
2508           int nchars, nbytes;
2509           /* emacs_mule_char can load a charset map from a file, which
2510              allocates a large structure and might cause buffer text
2511              to be relocated as result.  Thus, we need to remember the
2512              original pointer to buffer text, and fixup all related
2513              pointers after the call.  */
2514           const unsigned char *orig = coding->source;
2515           EMACS_INT offset;
2516
2517           c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2518                                cmp_status);
2519           offset = coding->source - orig;
2520           if (offset)
2521             {
2522               src += offset;
2523               src_base += offset;
2524               src_end += offset;
2525             }
2526           if (c < 0)
2527             {
2528               if (c == -1)
2529                 goto invalid_code;
2530               if (c == -2)
2531                 break;
2532             }
2533           src = src_base + nbytes;
2534           consumed_chars = consumed_chars_base + nchars;
2535           if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2536             cmp_status->ncomps -= nchars;
2537         }
2538
2539       /* Now if C >= 0, we found a normally encoded characer, if C <
2540          0, we found an old-style composition component character or
2541          rule.  */
2542
2543       if (cmp_status->state == COMPOSING_NO)
2544         {
2545           if (last_id != id)
2546             {
2547               if (last_id != charset_ascii)
2548                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2549                                   last_id);
2550               last_id = id;
2551               last_offset = char_offset;
2552             }
2553           *charbuf++ = c;
2554           char_offset++;
2555         }
2556       else if (cmp_status->state == COMPOSING_CHAR)
2557         {
2558           if (cmp_status->old_form)
2559             {
2560               if (c >= 0)
2561                 {
2562                   EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2563                   *charbuf++ = c;
2564                   char_offset++;
2565                 }
2566               else
2567                 {
2568                   *charbuf++ = -c;
2569                   cmp_status->nchars++;
2570                   cmp_status->length++;
2571                   if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2572                     EMACS_MULE_COMPOSITION_END ();
2573                   else if (cmp_status->method == COMPOSITION_WITH_RULE)
2574                     cmp_status->state = COMPOSING_RULE;
2575                 }
2576             }
2577           else
2578             {
2579               *charbuf++ = c;
2580               cmp_status->length++;
2581               cmp_status->nchars--;
2582               if (cmp_status->nchars == 0)
2583                 EMACS_MULE_COMPOSITION_END ();
2584             }
2585         }
2586       else if (cmp_status->state == COMPOSING_RULE)
2587         {
2588           int rule;
2589
2590           if (c >= 0)
2591             {
2592               EMACS_MULE_COMPOSITION_END ();
2593               *charbuf++ = c;
2594               char_offset++;
2595             }
2596           else
2597             {
2598               c = -c;
2599               DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2600               if (rule < 0)
2601                 goto invalid_code;
2602               *charbuf++ = -2;
2603               *charbuf++ = rule;
2604               cmp_status->length += 2;
2605               cmp_status->state = COMPOSING_CHAR;
2606             }
2607         }
2608       else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2609         {
2610           *charbuf++ = c;
2611           cmp_status->length++;
2612           if (cmp_status->ncomps == 0)
2613             cmp_status->state = COMPOSING_CHAR;
2614           else if (cmp_status->ncomps > 0)
2615             {
2616               if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2617                 cmp_status->state = COMPOSING_COMPONENT_RULE;
2618             }
2619           else
2620             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2621         }
2622       else                      /* COMPOSING_COMPONENT_RULE */
2623         {
2624           int rule;
2625
2626           DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2627           if (rule < 0)
2628             goto invalid_code;
2629           *charbuf++ = -2;
2630           *charbuf++ = rule;
2631           cmp_status->length += 2;
2632           cmp_status->ncomps--;
2633           if (cmp_status->ncomps > 0)
2634             cmp_status->state = COMPOSING_COMPONENT_CHAR;
2635           else
2636             EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2637         }
2638       continue;
2639
2640     retry:
2641       src = src_base;
2642       consumed_chars = consumed_chars_base;
2643       continue;
2644
2645     invalid_code:
2646       EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2647       src = src_base;
2648       consumed_chars = consumed_chars_base;
2649       ONE_MORE_BYTE (c);
2650       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2651       char_offset++;
2652       coding->errors++;
2653     }
2654
2655  no_more_source:
2656   if (cmp_status->state != COMPOSING_NO)
2657     {
2658       if (coding->mode & CODING_MODE_LAST_BLOCK)
2659         EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2660       else
2661         {
2662           int i;
2663
2664           charbuf -= cmp_status->length;
2665           for (i = 0; i < cmp_status->length; i++)
2666             cmp_status->carryover[i] = charbuf[i];
2667         }
2668     }
2669   if (last_id != charset_ascii)
2670     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2671   coding->consumed_char += consumed_chars_base;
2672   coding->consumed = src_base - coding->source;
2673   coding->charbuf_used = charbuf - coding->charbuf;
2674 }
2675
2676
2677 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2678   do {                                          \
2679     if (id < 0xA0)                              \
2680       codes[0] = id, codes[1] = 0;              \
2681     else if (id < 0xE0)                         \
2682       codes[0] = 0x9A, codes[1] = id;           \
2683     else if (id < 0xF0)                         \
2684       codes[0] = 0x9B, codes[1] = id;           \
2685     else if (id < 0xF5)                         \
2686       codes[0] = 0x9C, codes[1] = id;           \
2687     else                                        \
2688       codes[0] = 0x9D, codes[1] = id;           \
2689   } while (0);
2690
2691
2692 static int
2693 encode_coding_emacs_mule (struct coding_system *coding)
2694 {
2695   int multibytep = coding->dst_multibyte;
2696   int *charbuf = coding->charbuf;
2697   int *charbuf_end = charbuf + coding->charbuf_used;
2698   unsigned char *dst = coding->destination + coding->produced;
2699   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2700   int safe_room = 8;
2701   int produced_chars = 0;
2702   Lisp_Object attrs, charset_list;
2703   int c;
2704   int preferred_charset_id = -1;
2705
2706   CODING_GET_INFO (coding, attrs, charset_list);
2707   if (! EQ (charset_list, Vemacs_mule_charset_list))
2708     {
2709       CODING_ATTR_CHARSET_LIST (attrs)
2710         = charset_list = Vemacs_mule_charset_list;
2711     }
2712
2713   while (charbuf < charbuf_end)
2714     {
2715       ASSURE_DESTINATION (safe_room);
2716       c = *charbuf++;
2717
2718       if (c < 0)
2719         {
2720           /* Handle an annotation.  */
2721           switch (*charbuf)
2722             {
2723             case CODING_ANNOTATE_COMPOSITION_MASK:
2724               /* Not yet implemented.  */
2725               break;
2726             case CODING_ANNOTATE_CHARSET_MASK:
2727               preferred_charset_id = charbuf[3];
2728               if (preferred_charset_id >= 0
2729                   && NILP (Fmemq (make_number (preferred_charset_id),
2730                                   charset_list)))
2731                 preferred_charset_id = -1;
2732               break;
2733             default:
2734               abort ();
2735             }
2736           charbuf += -c - 1;
2737           continue;
2738         }
2739
2740       if (ASCII_CHAR_P (c))
2741         EMIT_ONE_ASCII_BYTE (c);
2742       else if (CHAR_BYTE8_P (c))
2743         {
2744           c = CHAR_TO_BYTE8 (c);
2745           EMIT_ONE_BYTE (c);
2746         }
2747       else
2748         {
2749           struct charset *charset;
2750           unsigned code;
2751           int dimension;
2752           int emacs_mule_id;
2753           unsigned char leading_codes[2];
2754
2755           if (preferred_charset_id >= 0)
2756             {
2757               charset = CHARSET_FROM_ID (preferred_charset_id);
2758               if (CHAR_CHARSET_P (c, charset))
2759                 code = ENCODE_CHAR (charset, c);
2760               else
2761                 charset = char_charset (c, charset_list, &code);
2762             }
2763           else
2764             charset = char_charset (c, charset_list, &code);
2765           if (! charset)
2766             {
2767               c = coding->default_char;
2768               if (ASCII_CHAR_P (c))
2769                 {
2770                   EMIT_ONE_ASCII_BYTE (c);
2771                   continue;
2772                 }
2773               charset = char_charset (c, charset_list, &code);
2774             }
2775           dimension = CHARSET_DIMENSION (charset);
2776           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2777           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2778           EMIT_ONE_BYTE (leading_codes[0]);
2779           if (leading_codes[1])
2780             EMIT_ONE_BYTE (leading_codes[1]);
2781           if (dimension == 1)
2782             EMIT_ONE_BYTE (code | 0x80);
2783           else
2784             {
2785               code |= 0x8080;
2786               EMIT_ONE_BYTE (code >> 8);
2787               EMIT_ONE_BYTE (code & 0xFF);
2788             }
2789         }
2790     }
2791   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2792   coding->produced_char += produced_chars;
2793   coding->produced = dst - coding->destination;
2794   return 0;
2795 }
2796
2797 \f
2798 /*** 7. ISO2022 handlers ***/
2799
2800 /* The following note describes the coding system ISO2022 briefly.
2801    Since the intention of this note is to help understand the
2802    functions in this file, some parts are NOT ACCURATE or are OVERLY
2803    SIMPLIFIED.  For thorough understanding, please refer to the
2804    original document of ISO2022.  This is equivalent to the standard
2805    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2806
2807    ISO2022 provides many mechanisms to encode several character sets
2808    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2809    is encoded using bytes less than 128.  This may make the encoded
2810    text a little bit longer, but the text passes more easily through
2811    several types of gateway, some of which strip off the MSB (Most
2812    Significant Bit).
2813
2814    There are two kinds of character sets: control character sets and
2815    graphic character sets.  The former contain control characters such
2816    as `newline' and `escape' to provide control functions (control
2817    functions are also provided by escape sequences).  The latter
2818    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2819    two control character sets and many graphic character sets.
2820
2821    Graphic character sets are classified into one of the following
2822    four classes, according to the number of bytes (DIMENSION) and
2823    number of characters in one dimension (CHARS) of the set:
2824    - DIMENSION1_CHARS94
2825    - DIMENSION1_CHARS96
2826    - DIMENSION2_CHARS94
2827    - DIMENSION2_CHARS96
2828
2829    In addition, each character set is assigned an identification tag,
2830    unique for each set, called the "final character" (denoted as <F>
2831    hereafter).  The <F> of each character set is decided by ECMA(*)
2832    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2833    (0x30..0x3F are for private use only).
2834
2835    Note (*): ECMA = European Computer Manufacturers Association
2836
2837    Here are examples of graphic character sets [NAME(<F>)]:
2838         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2839         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2840         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2841         o DIMENSION2_CHARS96 -- none for the moment
2842
2843    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2844         C0 [0x00..0x1F] -- control character plane 0
2845         GL [0x20..0x7F] -- graphic character plane 0
2846         C1 [0x80..0x9F] -- control character plane 1
2847         GR [0xA0..0xFF] -- graphic character plane 1
2848
2849    A control character set is directly designated and invoked to C0 or
2850    C1 by an escape sequence.  The most common case is that:
2851    - ISO646's  control character set is designated/invoked to C0, and
2852    - ISO6429's control character set is designated/invoked to C1,
2853    and usually these designations/invocations are omitted in encoded
2854    text.  In a 7-bit environment, only C0 can be used, and a control
2855    character for C1 is encoded by an appropriate escape sequence to
2856    fit into the environment.  All control characters for C1 are
2857    defined to have corresponding escape sequences.
2858
2859    A graphic character set is at first designated to one of four
2860    graphic registers (G0 through G3), then these graphic registers are
2861    invoked to GL or GR.  These designations and invocations can be
2862    done independently.  The most common case is that G0 is invoked to
2863    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2864    these invocations and designations are omitted in encoded text.
2865    In a 7-bit environment, only GL can be used.
2866
2867    When a graphic character set of CHARS94 is invoked to GL, codes
2868    0x20 and 0x7F of the GL area work as control characters SPACE and
2869    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2870    be used.
2871
2872    There are two ways of invocation: locking-shift and single-shift.
2873    With locking-shift, the invocation lasts until the next different
2874    invocation, whereas with single-shift, the invocation affects the
2875    following character only and doesn't affect the locking-shift
2876    state.  Invocations are done by the following control characters or
2877    escape sequences:
2878
2879    ----------------------------------------------------------------------
2880    abbrev  function                  cntrl escape seq   description
2881    ----------------------------------------------------------------------
2882    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2883    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2884    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2885    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2886    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2887    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2888    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2889    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2890    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2891    ----------------------------------------------------------------------
2892    (*) These are not used by any known coding system.
2893
2894    Control characters for these functions are defined by macros
2895    ISO_CODE_XXX in `coding.h'.
2896
2897    Designations are done by the following escape sequences:
2898    ----------------------------------------------------------------------
2899    escape sequence      description
2900    ----------------------------------------------------------------------
2901    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2902    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2903    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2904    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2905    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2906    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2907    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2908    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2909    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2910    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2911    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2912    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2913    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2914    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2915    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2916    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2917    ----------------------------------------------------------------------
2918
2919    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2920    of dimension 1, chars 94, and final character <F>, etc...
2921
2922    Note (*): Although these designations are not allowed in ISO2022,
2923    Emacs accepts them on decoding, and produces them on encoding
2924    CHARS96 character sets in a coding system which is characterized as
2925    7-bit environment, non-locking-shift, and non-single-shift.
2926
2927    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2928    '(' must be omitted.  We refer to this as "short-form" hereafter.
2929
2930    Now you may notice that there are a lot of ways of encoding the
2931    same multilingual text in ISO2022.  Actually, there exist many
2932    coding systems such as Compound Text (used in X11's inter client
2933    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2934    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2935    localized platforms), and all of these are variants of ISO2022.
2936
2937    In addition to the above, Emacs handles two more kinds of escape
2938    sequences: ISO6429's direction specification and Emacs' private
2939    sequence for specifying character composition.
2940
2941    ISO6429's direction specification takes the following form:
2942         o CSI ']'      -- end of the current direction
2943         o CSI '0' ']'  -- end of the current direction
2944         o CSI '1' ']'  -- start of left-to-right text
2945         o CSI '2' ']'  -- start of right-to-left text
2946    The control character CSI (0x9B: control sequence introducer) is
2947    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2948
2949    Character composition specification takes the following form:
2950         o ESC '0' -- start relative composition
2951         o ESC '1' -- end composition
2952         o ESC '2' -- start rule-base composition (*)
2953         o ESC '3' -- start relative composition with alternate chars  (**)
2954         o ESC '4' -- start rule-base composition with alternate chars  (**)
2955   Since these are not standard escape sequences of any ISO standard,
2956   the use of them with these meanings is restricted to Emacs only.
2957
2958   (*) This form is used only in Emacs 20.7 and older versions,
2959   but newer versions can safely decode it.
2960   (**) This form is used only in Emacs 21.1 and newer versions,
2961   and older versions can't decode it.
2962
2963   Here's a list of example usages of these composition escape
2964   sequences (categorized by `enum composition_method').
2965
2966   COMPOSITION_RELATIVE:
2967         ESC 0 CHAR [ CHAR ] ESC 1
2968   COMPOSITION_WITH_RULE:
2969         ESC 2 CHAR [ RULE CHAR ] ESC 1
2970   COMPOSITION_WITH_ALTCHARS:
2971         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2972   COMPOSITION_WITH_RULE_ALTCHARS:
2973         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2974
2975 enum iso_code_class_type iso_code_class[256];
2976
2977 #define SAFE_CHARSET_P(coding, id)      \
2978   ((id) <= (coding)->max_charset_id     \
2979    && (coding)->safe_charsets[id] != 255)
2980
2981
2982 #define SHIFT_OUT_OK(category)  \
2983   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2984
2985 static void
2986 setup_iso_safe_charsets (Lisp_Object attrs)
2987 {
2988   Lisp_Object charset_list, safe_charsets;
2989   Lisp_Object request;
2990   Lisp_Object reg_usage;
2991   Lisp_Object tail;
2992   int reg94, reg96;
2993   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2994   int max_charset_id;
2995
2996   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2997   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2998       && ! EQ (charset_list, Viso_2022_charset_list))
2999     {
3000       CODING_ATTR_CHARSET_LIST (attrs)
3001         = charset_list = Viso_2022_charset_list;
3002       ASET (attrs, coding_attr_safe_charsets, Qnil);
3003     }
3004
3005   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
3006     return;
3007
3008   max_charset_id = 0;
3009   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3010     {
3011       int id = XINT (XCAR (tail));
3012       if (max_charset_id < id)
3013         max_charset_id = id;
3014     }
3015
3016   safe_charsets = make_uninit_string (max_charset_id + 1);
3017   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
3018   request = AREF (attrs, coding_attr_iso_request);
3019   reg_usage = AREF (attrs, coding_attr_iso_usage);
3020   reg94 = XINT (XCAR (reg_usage));
3021   reg96 = XINT (XCDR (reg_usage));
3022
3023   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
3024     {
3025       Lisp_Object id;
3026       Lisp_Object reg;
3027       struct charset *charset;
3028
3029       id = XCAR (tail);
3030       charset = CHARSET_FROM_ID (XINT (id));
3031       reg = Fcdr (Fassq (id, request));
3032       if (! NILP (reg))
3033         SSET (safe_charsets, XINT (id), XINT (reg));
3034       else if (charset->iso_chars_96)
3035         {
3036           if (reg96 < 4)
3037             SSET (safe_charsets, XINT (id), reg96);
3038         }
3039       else
3040         {
3041           if (reg94 < 4)
3042             SSET (safe_charsets, XINT (id), reg94);
3043         }
3044     }
3045   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
3046 }
3047
3048
3049 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3050    Check if a text is encoded in one of ISO-2022 based codig systems.
3051    If it is, return 1, else return 0.  */
3052
3053 static int
3054 detect_coding_iso_2022 (struct coding_system *coding,
3055                         struct coding_detection_info *detect_info)
3056 {
3057   const unsigned char *src = coding->source, *src_base = src;
3058   const unsigned char *src_end = coding->source + coding->src_bytes;
3059   int multibytep = coding->src_multibyte;
3060   int single_shifting = 0;
3061   int id;
3062   int c, c1;
3063   int consumed_chars = 0;
3064   int i;
3065   int rejected = 0;
3066   int found = 0;
3067   int composition_count = -1;
3068
3069   detect_info->checked |= CATEGORY_MASK_ISO;
3070
3071   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
3072     {
3073       struct coding_system *this = &(coding_categories[i]);
3074       Lisp_Object attrs, val;
3075
3076       if (this->id < 0)
3077         continue;
3078       attrs = CODING_ID_ATTRS (this->id);
3079       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
3080           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Viso_2022_charset_list))
3081         setup_iso_safe_charsets (attrs);
3082       val = CODING_ATTR_SAFE_CHARSETS (attrs);
3083       this->max_charset_id = SCHARS (val) - 1;
3084       this->safe_charsets = SDATA (val);
3085     }
3086
3087   /* A coding system of this category is always ASCII compatible.  */
3088   src += coding->head_ascii;
3089
3090   while (rejected != CATEGORY_MASK_ISO)
3091     {
3092       src_base = src;
3093       ONE_MORE_BYTE (c);
3094       switch (c)
3095         {
3096         case ISO_CODE_ESC:
3097           if (inhibit_iso_escape_detection)
3098             break;
3099           single_shifting = 0;
3100           ONE_MORE_BYTE (c);
3101           if (c >= '(' && c <= '/')
3102             {
3103               /* Designation sequence for a charset of dimension 1.  */
3104               ONE_MORE_BYTE (c1);
3105               if (c1 < ' ' || c1 >= 0x80
3106                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
3107                 /* Invalid designation sequence.  Just ignore.  */
3108                 break;
3109             }
3110           else if (c == '$')
3111             {
3112               /* Designation sequence for a charset of dimension 2.  */
3113               ONE_MORE_BYTE (c);
3114               if (c >= '@' && c <= 'B')
3115                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
3116                 id = iso_charset_table[1][0][c];
3117               else if (c >= '(' && c <= '/')
3118                 {
3119                   ONE_MORE_BYTE (c1);
3120                   if (c1 < ' ' || c1 >= 0x80
3121                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
3122                     /* Invalid designation sequence.  Just ignore.  */
3123                     break;
3124                 }
3125               else
3126                 /* Invalid designation sequence.  Just ignore it.  */
3127                 break;
3128             }
3129           else if (c == 'N' || c == 'O')
3130             {
3131               /* ESC <Fe> for SS2 or SS3.  */
3132               single_shifting = 1;
3133               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3134               break;
3135             }
3136           else if (c == '1')
3137             {
3138               /* End of composition.  */
3139               if (composition_count < 0
3140                   || composition_count > MAX_COMPOSITION_COMPONENTS)
3141                 /* Invalid */
3142                 break;
3143               composition_count = -1;
3144               found |= CATEGORY_MASK_ISO;
3145             }
3146           else if (c >= '0' && c <= '4')
3147             {
3148               /* ESC <Fp> for start/end composition.  */
3149               composition_count = 0;
3150               break;
3151             }
3152           else
3153             {
3154               /* Invalid escape sequence.  Just ignore it.  */
3155               break;
3156             }
3157
3158           /* We found a valid designation sequence for CHARSET.  */
3159           rejected |= CATEGORY_MASK_ISO_8BIT;
3160           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
3161                               id))
3162             found |= CATEGORY_MASK_ISO_7;
3163           else
3164             rejected |= CATEGORY_MASK_ISO_7;
3165           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
3166                               id))
3167             found |= CATEGORY_MASK_ISO_7_TIGHT;
3168           else
3169             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
3170           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
3171                               id))
3172             found |= CATEGORY_MASK_ISO_7_ELSE;
3173           else
3174             rejected |= CATEGORY_MASK_ISO_7_ELSE;
3175           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
3176                               id))
3177             found |= CATEGORY_MASK_ISO_8_ELSE;
3178           else
3179             rejected |= CATEGORY_MASK_ISO_8_ELSE;
3180           break;
3181
3182         case ISO_CODE_SO:
3183         case ISO_CODE_SI:
3184           /* Locking shift out/in.  */
3185           if (inhibit_iso_escape_detection)
3186             break;
3187           single_shifting = 0;
3188           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
3189           break;
3190
3191         case ISO_CODE_CSI:
3192           /* Control sequence introducer.  */
3193           single_shifting = 0;
3194           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3195           found |= CATEGORY_MASK_ISO_8_ELSE;
3196           goto check_extra_latin;
3197
3198         case ISO_CODE_SS2:
3199         case ISO_CODE_SS3:
3200           /* Single shift.   */
3201           if (inhibit_iso_escape_detection)
3202             break;
3203           single_shifting = 0;
3204           rejected |= CATEGORY_MASK_ISO_7BIT;
3205           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3206               & CODING_ISO_FLAG_SINGLE_SHIFT)
3207             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
3208           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
3209               & CODING_ISO_FLAG_SINGLE_SHIFT)
3210             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
3211           if (single_shifting)
3212             break;
3213           goto check_extra_latin;
3214
3215         default:
3216           if (c < 0)
3217             continue;
3218           if (c < 0x80)
3219             {
3220               if (composition_count >= 0)
3221                 composition_count++;
3222               single_shifting = 0;
3223               break;
3224             }
3225           if (c >= 0xA0)
3226             {
3227               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
3228               found |= CATEGORY_MASK_ISO_8_1;
3229               /* Check the length of succeeding codes of the range
3230                  0xA0..0FF.  If the byte length is even, we include
3231                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
3232                  only when we are not single shifting.  */
3233               if (! single_shifting
3234                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
3235                 {
3236                   int i = 1;
3237                   while (src < src_end)
3238                     {
3239                       src_base = src;
3240                       ONE_MORE_BYTE (c);
3241                       if (c < 0xA0)
3242                         {
3243                           src = src_base;
3244                           break;
3245                         }
3246                       i++;
3247                     }
3248
3249                   if (i & 1 && src < src_end)
3250                     {
3251                       rejected |= CATEGORY_MASK_ISO_8_2;
3252                       if (composition_count >= 0)
3253                         composition_count += i;
3254                     }
3255                   else
3256                     {
3257                       found |= CATEGORY_MASK_ISO_8_2;
3258                       if (composition_count >= 0)
3259                         composition_count += i / 2;
3260                     }
3261                 }
3262               break;
3263             }
3264         check_extra_latin:
3265           single_shifting = 0;
3266           if (! VECTORP (Vlatin_extra_code_table)
3267               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3268             {
3269               rejected = CATEGORY_MASK_ISO;
3270               break;
3271             }
3272           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
3273               & CODING_ISO_FLAG_LATIN_EXTRA)
3274             found |= CATEGORY_MASK_ISO_8_1;
3275           else
3276             rejected |= CATEGORY_MASK_ISO_8_1;
3277           rejected |= CATEGORY_MASK_ISO_8_2;
3278         }
3279     }
3280   detect_info->rejected |= CATEGORY_MASK_ISO;
3281   return 0;
3282
3283  no_more_source:
3284   detect_info->rejected |= rejected;
3285   detect_info->found |= (found & ~rejected);
3286   return 1;
3287 }
3288
3289
3290 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
3291    escape sequence should be kept.  */
3292 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
3293   do {                                                                  \
3294     int id, prev;                                                       \
3295                                                                         \
3296     if (final < '0' || final >= 128                                     \
3297         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
3298         || !SAFE_CHARSET_P (coding, id))                                \
3299       {                                                                 \
3300         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
3301         chars_96 = -1;                                                  \
3302         break;                                                          \
3303       }                                                                 \
3304     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
3305     if (id == charset_jisx0201_roman)                                   \
3306       {                                                                 \
3307         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
3308           id = charset_ascii;                                           \
3309       }                                                                 \
3310     else if (id == charset_jisx0208_1978)                               \
3311       {                                                                 \
3312         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
3313           id = charset_jisx0208;                                        \
3314       }                                                                 \
3315     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
3316     /* If there was an invalid designation to REG previously, and this  \
3317        designation is ASCII to REG, we should keep this designation     \
3318        sequence.  */                                                    \
3319     if (prev == -2 && id == charset_ascii)                              \
3320       chars_96 = -1;                                                    \
3321   } while (0)
3322
3323
3324 /* Handle these composition sequence (ALT: alternate char):
3325
3326    (1) relative composition: ESC 0 CHAR ... ESC 1
3327    (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3328    (3) altchar composition:  ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3329    (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3330
3331    When the start sequence (ESC 0/2/3/4) is found, this annotation
3332    header is produced.
3333
3334         [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3335
3336    Then, upon reading CHAR or RULE (one or two bytes), these codes are
3337    produced until the end sequence (ESC 1) is found:
3338
3339    (1) CHAR ... CHAR
3340    (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3341    (3) ALT ... ALT -1 -1 CHAR ... CHAR
3342    (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3343
3344    When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3345    annotation header is updated as below:
3346
3347    (1) LENGTH: unchanged,  NCHARS: number of CHARs
3348    (2) LENGTH: unchanged,  NCHARS: number of CHARs
3349    (3) LENGTH: += number of ALTs + 2,  NCHARS: number of CHARs
3350    (4) LENGTH: += number of ALTs * 3,  NCHARS: number of CHARs
3351
3352    If an error is found while composing, the annotation header is
3353    changed to:
3354
3355         [ ESC '0'/'2'/'3'/'4' -2 0 ]
3356
3357    and the sequence [ -2 DECODED-RULE ] is changed to the original
3358    byte sequence as below:
3359         o the original byte sequence is B: [ B -1 ]
3360         o the original byte sequence is B1 B2: [ B1 B2 ]
3361    and the sequence [ -1 -1 ] is changed to the original byte
3362    sequence:
3363         [ ESC '0' ]
3364 */
3365
3366 /* Decode a composition rule C1 and maybe one more byte from the
3367    source, and set RULE to the encoded composition rule, NBYTES to the
3368    length of the composition rule.  If the rule is invalid, set RULE
3369    to some negative value.  */
3370
3371 #define DECODE_COMPOSITION_RULE(rule, nbytes)                           \
3372   do {                                                                  \
3373     rule = c1 - 32;                                                     \
3374     if (rule < 0)                                                       \
3375       break;                                                            \
3376     if (rule < 81)              /* old format (before ver.21) */        \
3377       {                                                                 \
3378         int gref = (rule) / 9;                                          \
3379         int nref = (rule) % 9;                                          \
3380         if (gref == 4) gref = 10;                                       \
3381         if (nref == 4) nref = 10;                                       \
3382         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
3383         nbytes = 1;                                                     \
3384       }                                                                 \
3385     else                        /* new format (after ver.21) */         \
3386       {                                                                 \
3387         int c;                                                          \
3388                                                                         \
3389         ONE_MORE_BYTE (c);                                              \
3390         rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32);             \
3391         if (rule >= 0)                                                  \
3392           rule += 0x100;   /* to destinguish it from the old format */  \
3393         nbytes = 2;                                                     \
3394       }                                                                 \
3395   } while (0)
3396
3397 #define ENCODE_COMPOSITION_RULE(rule)                           \
3398   do {                                                          \
3399     int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3400                                                                 \
3401     if (rule < 0x100)           /* old format */                \
3402       {                                                         \
3403         if (gref == 10) gref = 4;                               \
3404         if (nref == 10) nref = 4;                               \
3405         charbuf[idx] = 32 + gref * 9 + nref;                    \
3406         charbuf[idx + 1] = -1;                                  \
3407         new_chars++;                                            \
3408       }                                                         \
3409     else                                /* new format */        \
3410       {                                                         \
3411         charbuf[idx] = 32 + 81 + gref;                          \
3412         charbuf[idx + 1] = 32 + nref;                           \
3413         new_chars += 2;                                         \
3414       }                                                         \
3415   } while (0)
3416
3417 /* Finish the current composition as invalid.  */
3418
3419 static int finish_composition (int *, struct composition_status *);
3420
3421 static int
3422 finish_composition (int *charbuf, struct composition_status *cmp_status)
3423 {
3424   int idx = - cmp_status->length;
3425   int new_chars;
3426
3427   /* Recover the original ESC sequence */
3428   charbuf[idx++] = ISO_CODE_ESC;
3429   charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3430                     : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3431                     : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3432                     /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3433                     : '4');
3434   charbuf[idx++] = -2;
3435   charbuf[idx++] = 0;
3436   charbuf[idx++] = -1;
3437   new_chars = cmp_status->nchars;
3438   if (cmp_status->method >= COMPOSITION_WITH_RULE)
3439     for (; idx < 0; idx++)
3440       {
3441         int elt = charbuf[idx];
3442
3443         if (elt == -2)
3444           {
3445             ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3446             idx++;
3447           }
3448         else if (elt == -1)
3449           {
3450             charbuf[idx++] = ISO_CODE_ESC;
3451             charbuf[idx] = '0';
3452             new_chars += 2;
3453           }
3454       }
3455   cmp_status->state = COMPOSING_NO;
3456   return new_chars;
3457 }
3458
3459 /* If characers are under composition, finish the composition.  */
3460 #define MAYBE_FINISH_COMPOSITION()                              \
3461   do {                                                          \
3462     if (cmp_status->state != COMPOSING_NO)                      \
3463       char_offset += finish_composition (charbuf, cmp_status);  \
3464   } while (0)
3465
3466 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3467
3468    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3469    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3470    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3471    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3472
3473    Produce this annotation sequence now:
3474
3475    [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3476 */
3477
3478 #define DECODE_COMPOSITION_START(c1)                                       \
3479   do {                                                                     \
3480     if (c1 == '0'                                                          \
3481         && ((cmp_status->state == COMPOSING_COMPONENT_CHAR                 \
3482              && cmp_status->method == COMPOSITION_WITH_ALTCHARS)           \
3483             || (cmp_status->state == COMPOSING_COMPONENT_RULE              \
3484                 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3485       {                                                                    \
3486         *charbuf++ = -1;                                                   \
3487         *charbuf++= -1;                                                    \
3488         cmp_status->state = COMPOSING_CHAR;                                \
3489         cmp_status->length += 2;                                           \
3490       }                                                                    \
3491     else                                                                   \
3492       {                                                                    \
3493         MAYBE_FINISH_COMPOSITION ();                                       \
3494         cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE             \
3495                               : c1 == '2' ? COMPOSITION_WITH_RULE          \
3496                               : c1 == '3' ? COMPOSITION_WITH_ALTCHARS      \
3497                               : COMPOSITION_WITH_RULE_ALTCHARS);           \
3498         cmp_status->state                                                  \
3499           = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR);       \
3500         ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method);          \
3501         cmp_status->length = MAX_ANNOTATION_LENGTH;                        \
3502         cmp_status->nchars = cmp_status->ncomps = 0;                       \
3503         coding->annotated = 1;                                             \
3504       }                                                                    \
3505   } while (0)
3506
3507
3508 /* Handle composition end sequence ESC 1.  */
3509
3510 #define DECODE_COMPOSITION_END()                                        \
3511   do {                                                                  \
3512     if (cmp_status->nchars == 0                                         \
3513         || ((cmp_status->state == COMPOSING_CHAR)                       \
3514             == (cmp_status->method == COMPOSITION_WITH_RULE)))          \
3515       {                                                                 \
3516         MAYBE_FINISH_COMPOSITION ();                                    \
3517         goto invalid_code;                                              \
3518       }                                                                 \
3519     if (cmp_status->method == COMPOSITION_WITH_ALTCHARS)                \
3520       charbuf[- cmp_status->length] -= cmp_status->ncomps + 2;          \
3521     else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)      \
3522       charbuf[- cmp_status->length] -= cmp_status->ncomps * 3;          \
3523     charbuf[- cmp_status->length + 2] = cmp_status->nchars;             \
3524     char_offset += cmp_status->nchars;                                  \
3525     cmp_status->state = COMPOSING_NO;                                   \
3526   } while (0)
3527
3528 /* Store a composition rule RULE in charbuf, and update cmp_status.  */
3529
3530 #define STORE_COMPOSITION_RULE(rule)    \
3531   do {                                  \
3532     *charbuf++ = -2;                    \
3533     *charbuf++ = rule;                  \
3534     cmp_status->length += 2;            \
3535     cmp_status->state--;                \
3536   } while (0)
3537
3538 /* Store a composed char or a component char C in charbuf, and update
3539    cmp_status.  */
3540
3541 #define STORE_COMPOSITION_CHAR(c)                                       \
3542   do {                                                                  \
3543     *charbuf++ = (c);                                                   \
3544     cmp_status->length++;                                               \
3545     if (cmp_status->state == COMPOSING_CHAR)                            \
3546       cmp_status->nchars++;                                             \
3547     else                                                                \
3548       cmp_status->ncomps++;                                             \
3549     if (cmp_status->method == COMPOSITION_WITH_RULE                     \
3550         || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS        \
3551             && cmp_status->state == COMPOSING_COMPONENT_CHAR))          \
3552       cmp_status->state++;                                              \
3553   } while (0)
3554
3555
3556 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3557
3558 static void
3559 decode_coding_iso_2022 (struct coding_system *coding)
3560 {
3561   const unsigned char *src = coding->source + coding->consumed;
3562   const unsigned char *src_end = coding->source + coding->src_bytes;
3563   const unsigned char *src_base;
3564   int *charbuf = coding->charbuf + coding->charbuf_used;
3565   /* We may produce two annocations (charset and composition) in one
3566      loop and one more charset annocation at the end.  */
3567   int *charbuf_end
3568     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 3);
3569   int consumed_chars = 0, consumed_chars_base;
3570   int multibytep = coding->src_multibyte;
3571   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3572   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3573   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3574   int charset_id_2, charset_id_3;
3575   struct charset *charset;
3576   int c;
3577   struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3578   Lisp_Object attrs, charset_list;
3579   int char_offset = coding->produced_char;
3580   int last_offset = char_offset;
3581   int last_id = charset_ascii;
3582   int eol_crlf =
3583     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3584   int byte_after_cr = -1;
3585   int i;
3586
3587   CODING_GET_INFO (coding, attrs, charset_list);
3588   setup_iso_safe_charsets (attrs);
3589   /* Charset list may have been changed.  */
3590   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3591   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3592
3593   if (cmp_status->state != COMPOSING_NO)
3594     {
3595       for (i = 0; i < cmp_status->length; i++)
3596         *charbuf++ = cmp_status->carryover[i];
3597       coding->annotated = 1;
3598     }
3599
3600   while (1)
3601     {
3602       int c1, c2, c3;
3603
3604       src_base = src;
3605       consumed_chars_base = consumed_chars;
3606
3607       if (charbuf >= charbuf_end)
3608         {
3609           if (byte_after_cr >= 0)
3610             src_base--;
3611           break;
3612         }
3613
3614       if (byte_after_cr >= 0)
3615         c1 = byte_after_cr, byte_after_cr = -1;
3616       else
3617         ONE_MORE_BYTE (c1);
3618       if (c1 < 0)
3619         goto invalid_code;
3620
3621       if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3622         {
3623           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3624           char_offset++;
3625           CODING_ISO_EXTSEGMENT_LEN (coding)--;
3626           continue;
3627         }
3628
3629       if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3630         {
3631           if (c1 == ISO_CODE_ESC)
3632             {
3633               if (src + 1 >= src_end)
3634                 goto no_more_source;
3635               *charbuf++ = ISO_CODE_ESC;
3636               char_offset++;
3637               if (src[0] == '%' && src[1] == '@')
3638                 {
3639                   src += 2;
3640                   consumed_chars += 2;
3641                   char_offset += 2;
3642                   /* We are sure charbuf can contain two more chars. */
3643                   *charbuf++ = '%';
3644                   *charbuf++ = '@';
3645                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3646                 }
3647             }
3648           else
3649             {
3650               *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3651               char_offset++;
3652             }
3653           continue;
3654         }
3655
3656       if ((cmp_status->state == COMPOSING_RULE
3657            || cmp_status->state == COMPOSING_COMPONENT_RULE)
3658           && c1 != ISO_CODE_ESC)
3659         {
3660           int rule, nbytes;
3661
3662           DECODE_COMPOSITION_RULE (rule, nbytes);
3663           if (rule < 0)
3664             goto invalid_code;
3665           STORE_COMPOSITION_RULE (rule);
3666           continue;
3667         }
3668
3669       /* We produce at most one character.  */
3670       switch (iso_code_class [c1])
3671         {
3672         case ISO_0x20_or_0x7F:
3673           if (charset_id_0 < 0
3674               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3675             /* This is SPACE or DEL.  */
3676             charset = CHARSET_FROM_ID (charset_ascii);
3677           else
3678             charset = CHARSET_FROM_ID (charset_id_0);
3679           break;
3680
3681         case ISO_graphic_plane_0:
3682           if (charset_id_0 < 0)
3683             charset = CHARSET_FROM_ID (charset_ascii);
3684           else
3685             charset = CHARSET_FROM_ID (charset_id_0);
3686           break;
3687
3688         case ISO_0xA0_or_0xFF:
3689           if (charset_id_1 < 0
3690               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3691               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3692             goto invalid_code;
3693           /* This is a graphic character, we fall down ... */
3694
3695         case ISO_graphic_plane_1:
3696           if (charset_id_1 < 0)
3697             goto invalid_code;
3698           charset = CHARSET_FROM_ID (charset_id_1);
3699           break;
3700
3701         case ISO_control_0:
3702           if (eol_crlf && c1 == '\r')
3703             ONE_MORE_BYTE (byte_after_cr);
3704           MAYBE_FINISH_COMPOSITION ();
3705           charset = CHARSET_FROM_ID (charset_ascii);
3706           break;
3707
3708         case ISO_control_1:
3709           goto invalid_code;
3710
3711         case ISO_shift_out:
3712           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3713               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3714             goto invalid_code;
3715           CODING_ISO_INVOCATION (coding, 0) = 1;
3716           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3717           continue;
3718
3719         case ISO_shift_in:
3720           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3721             goto invalid_code;
3722           CODING_ISO_INVOCATION (coding, 0) = 0;
3723           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3724           continue;
3725
3726         case ISO_single_shift_2_7:
3727           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS))
3728             goto invalid_code;
3729         case ISO_single_shift_2:
3730           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3731             goto invalid_code;
3732           /* SS2 is handled as an escape sequence of ESC 'N' */
3733           c1 = 'N';
3734           goto label_escape_sequence;
3735
3736         case ISO_single_shift_3:
3737           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3738             goto invalid_code;
3739           /* SS2 is handled as an escape sequence of ESC 'O' */
3740           c1 = 'O';
3741           goto label_escape_sequence;
3742
3743         case ISO_control_sequence_introducer:
3744           /* CSI is handled as an escape sequence of ESC '[' ...  */
3745           c1 = '[';
3746           goto label_escape_sequence;
3747
3748         case ISO_escape:
3749           ONE_MORE_BYTE (c1);
3750         label_escape_sequence:
3751           /* Escape sequences handled here are invocation,
3752              designation, direction specification, and character
3753              composition specification.  */
3754           switch (c1)
3755             {
3756             case '&':           /* revision of following character set */
3757               ONE_MORE_BYTE (c1);
3758               if (!(c1 >= '@' && c1 <= '~'))
3759                 goto invalid_code;
3760               ONE_MORE_BYTE (c1);
3761               if (c1 != ISO_CODE_ESC)
3762                 goto invalid_code;
3763               ONE_MORE_BYTE (c1);
3764               goto label_escape_sequence;
3765
3766             case '$':           /* designation of 2-byte character set */
3767               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3768                 goto invalid_code;
3769               {
3770                 int reg, chars96;
3771
3772                 ONE_MORE_BYTE (c1);
3773                 if (c1 >= '@' && c1 <= 'B')
3774                   {     /* designation of JISX0208.1978, GB2312.1980,
3775                            or JISX0208.1980 */
3776                     reg = 0, chars96 = 0;
3777                   }
3778                 else if (c1 >= 0x28 && c1 <= 0x2B)
3779                   { /* designation of DIMENSION2_CHARS94 character set */
3780                     reg = c1 - 0x28, chars96 = 0;
3781                     ONE_MORE_BYTE (c1);
3782                   }
3783                 else if (c1 >= 0x2C && c1 <= 0x2F)
3784                   { /* designation of DIMENSION2_CHARS96 character set */
3785                     reg = c1 - 0x2C, chars96 = 1;
3786                     ONE_MORE_BYTE (c1);
3787                   }
3788                 else
3789                   goto invalid_code;
3790                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3791                 /* We must update these variables now.  */
3792                 if (reg == 0)
3793                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3794                 else if (reg == 1)
3795                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3796                 if (chars96 < 0)
3797                   goto invalid_code;
3798               }
3799               continue;
3800
3801             case 'n':           /* invocation of locking-shift-2 */
3802               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3803                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3804                 goto invalid_code;
3805               CODING_ISO_INVOCATION (coding, 0) = 2;
3806               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3807               continue;
3808
3809             case 'o':           /* invocation of locking-shift-3 */
3810               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3811                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3812                 goto invalid_code;
3813               CODING_ISO_INVOCATION (coding, 0) = 3;
3814               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3815               continue;
3816
3817             case 'N':           /* invocation of single-shift-2 */
3818               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3819                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3820                 goto invalid_code;
3821               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3822               if (charset_id_2 < 0)
3823                 charset = CHARSET_FROM_ID (charset_ascii);
3824               else
3825                 charset = CHARSET_FROM_ID (charset_id_2);
3826               ONE_MORE_BYTE (c1);
3827               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3828                 goto invalid_code;
3829               break;
3830
3831             case 'O':           /* invocation of single-shift-3 */
3832               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3833                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3834                 goto invalid_code;
3835               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3836               if (charset_id_3 < 0)
3837                 charset = CHARSET_FROM_ID (charset_ascii);
3838               else
3839                 charset = CHARSET_FROM_ID (charset_id_3);
3840               ONE_MORE_BYTE (c1);
3841               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3842                 goto invalid_code;
3843               break;
3844
3845             case '0': case '2': case '3': case '4': /* start composition */
3846               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3847                 goto invalid_code;
3848               if (last_id != charset_ascii)
3849                 {
3850                   ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3851                   last_id = charset_ascii;
3852                   last_offset = char_offset;
3853                 }
3854               DECODE_COMPOSITION_START (c1);
3855               continue;
3856
3857             case '1':           /* end composition */
3858               if (cmp_status->state == COMPOSING_NO)
3859                 goto invalid_code;
3860               DECODE_COMPOSITION_END ();
3861               continue;
3862
3863             case '[':           /* specification of direction */
3864               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION))
3865                 goto invalid_code;
3866               /* For the moment, nested direction is not supported.
3867                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3868                  left-to-right, and nozero means right-to-left.  */
3869               ONE_MORE_BYTE (c1);
3870               switch (c1)
3871                 {
3872                 case ']':       /* end of the current direction */
3873                   coding->mode &= ~CODING_MODE_DIRECTION;
3874
3875                 case '0':       /* end of the current direction */
3876                 case '1':       /* start of left-to-right direction */
3877                   ONE_MORE_BYTE (c1);
3878                   if (c1 == ']')
3879                     coding->mode &= ~CODING_MODE_DIRECTION;
3880                   else
3881                     goto invalid_code;
3882                   break;
3883
3884                 case '2':       /* start of right-to-left direction */
3885                   ONE_MORE_BYTE (c1);
3886                   if (c1 == ']')
3887                     coding->mode |= CODING_MODE_DIRECTION;
3888                   else
3889                     goto invalid_code;
3890                   break;
3891
3892                 default:
3893                   goto invalid_code;
3894                 }
3895               continue;
3896
3897             case '%':
3898               ONE_MORE_BYTE (c1);
3899               if (c1 == '/')
3900                 {
3901                   /* CTEXT extended segment:
3902                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3903                      We keep these bytes as is for the moment.
3904                      They may be decoded by post-read-conversion.  */
3905                   int dim, M, L;
3906                   int size;
3907
3908                   ONE_MORE_BYTE (dim);
3909                   if (dim < 0 || dim > 4)
3910                     goto invalid_code;
3911                   ONE_MORE_BYTE (M);
3912                   if (M < 128)
3913                     goto invalid_code;
3914                   ONE_MORE_BYTE (L);
3915                   if (L < 128)
3916                     goto invalid_code;
3917                   size = ((M - 128) * 128) + (L - 128);
3918                   if (charbuf + 6 > charbuf_end)
3919                     goto break_loop;
3920                   *charbuf++ = ISO_CODE_ESC;
3921                   *charbuf++ = '%';
3922                   *charbuf++ = '/';
3923                   *charbuf++ = dim;
3924                   *charbuf++ = BYTE8_TO_CHAR (M);
3925                   *charbuf++ = BYTE8_TO_CHAR (L);
3926                   CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3927                 }
3928               else if (c1 == 'G')
3929                 {
3930                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3931                      ESC % G --UTF-8-BYTES-- ESC % @
3932                      We keep these bytes as is for the moment.
3933                      They may be decoded by post-read-conversion.  */
3934                   if (charbuf + 3 > charbuf_end)
3935                     goto break_loop;
3936                   *charbuf++ = ISO_CODE_ESC;
3937                   *charbuf++ = '%';
3938                   *charbuf++ = 'G';
3939                   CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3940                 }
3941               else
3942                 goto invalid_code;
3943               continue;
3944               break;
3945
3946             default:
3947               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3948                 goto invalid_code;
3949               {
3950                 int reg, chars96;
3951
3952                 if (c1 >= 0x28 && c1 <= 0x2B)
3953                   { /* designation of DIMENSION1_CHARS94 character set */
3954                     reg = c1 - 0x28, chars96 = 0;
3955                     ONE_MORE_BYTE (c1);
3956                   }
3957                 else if (c1 >= 0x2C && c1 <= 0x2F)
3958                   { /* designation of DIMENSION1_CHARS96 character set */
3959                     reg = c1 - 0x2C, chars96 = 1;
3960                     ONE_MORE_BYTE (c1);
3961                   }
3962                 else
3963                   goto invalid_code;
3964                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3965                 /* We must update these variables now.  */
3966                 if (reg == 0)
3967                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3968                 else if (reg == 1)
3969                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3970                 if (chars96 < 0)
3971                   goto invalid_code;
3972               }
3973               continue;
3974             }
3975         }
3976
3977       if (cmp_status->state == COMPOSING_NO
3978           && charset->id != charset_ascii
3979           && last_id != charset->id)
3980         {
3981           if (last_id != charset_ascii)
3982             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3983           last_id = charset->id;
3984           last_offset = char_offset;
3985         }
3986
3987       /* Now we know CHARSET and 1st position code C1 of a character.
3988          Produce a decoded character while getting 2nd and 3rd
3989          position codes C2, C3 if necessary.  */
3990       if (CHARSET_DIMENSION (charset) > 1)
3991         {
3992           ONE_MORE_BYTE (c2);
3993           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0)
3994               || ((c1 & 0x80) != (c2 & 0x80)))
3995             /* C2 is not in a valid range.  */
3996             goto invalid_code;
3997           if (CHARSET_DIMENSION (charset) == 2)
3998             c1 = (c1 << 8) | c2;
3999           else
4000             {
4001               ONE_MORE_BYTE (c3);
4002               if (c3 < 0x20 || (c3 >= 0x80 && c3 < 0xA0)
4003                   || ((c1 & 0x80) != (c3 & 0x80)))
4004                 /* C3 is not in a valid range.  */
4005                 goto invalid_code;
4006               c1 = (c1 << 16) | (c2 << 8) | c2;
4007             }
4008         }
4009       c1 &= 0x7F7F7F;
4010       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
4011       if (c < 0)
4012         {
4013           MAYBE_FINISH_COMPOSITION ();
4014           for (; src_base < src; src_base++, char_offset++)
4015             {
4016               if (ASCII_BYTE_P (*src_base))
4017                 *charbuf++ = *src_base;
4018               else
4019                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
4020             }
4021         }
4022       else if (cmp_status->state == COMPOSING_NO)
4023         {
4024           *charbuf++ = c;
4025           char_offset++;
4026         }
4027       else if ((cmp_status->state == COMPOSING_CHAR
4028                 ? cmp_status->nchars
4029                 : cmp_status->ncomps)
4030                >= MAX_COMPOSITION_COMPONENTS)
4031         {
4032           /* Too long composition.  */
4033           MAYBE_FINISH_COMPOSITION ();
4034           *charbuf++ = c;
4035           char_offset++;
4036         }
4037       else
4038         STORE_COMPOSITION_CHAR (c);
4039       continue;
4040
4041     invalid_code:
4042       MAYBE_FINISH_COMPOSITION ();
4043       src = src_base;
4044       consumed_chars = consumed_chars_base;
4045       ONE_MORE_BYTE (c);
4046       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4047       char_offset++;
4048       coding->errors++;
4049       continue;
4050
4051     break_loop:
4052       break;
4053     }
4054
4055  no_more_source:
4056   if (cmp_status->state != COMPOSING_NO)
4057     {
4058       if (coding->mode & CODING_MODE_LAST_BLOCK)
4059         MAYBE_FINISH_COMPOSITION ();
4060       else
4061         {
4062           charbuf -= cmp_status->length;
4063           for (i = 0; i < cmp_status->length; i++)
4064             cmp_status->carryover[i] = charbuf[i];
4065         }
4066     }
4067   else if (last_id != charset_ascii)
4068     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4069   coding->consumed_char += consumed_chars_base;
4070   coding->consumed = src_base - coding->source;
4071   coding->charbuf_used = charbuf - coding->charbuf;
4072 }
4073
4074
4075 /* ISO2022 encoding stuff.  */
4076
4077 /*
4078    It is not enough to say just "ISO2022" on encoding, we have to
4079    specify more details.  In Emacs, each coding system of ISO2022
4080    variant has the following specifications:
4081         1. Initial designation to G0 thru G3.
4082         2. Allows short-form designation?
4083         3. ASCII should be designated to G0 before control characters?
4084         4. ASCII should be designated to G0 at end of line?
4085         5. 7-bit environment or 8-bit environment?
4086         6. Use locking-shift?
4087         7. Use Single-shift?
4088    And the following two are only for Japanese:
4089         8. Use ASCII in place of JIS0201-1976-Roman?
4090         9. Use JISX0208-1983 in place of JISX0208-1978?
4091    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
4092    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
4093    details.
4094 */
4095
4096 /* Produce codes (escape sequence) for designating CHARSET to graphic
4097    register REG at DST, and increment DST.  If <final-char> of CHARSET is
4098    '@', 'A', or 'B' and the coding system CODING allows, produce
4099    designation sequence of short-form.  */
4100
4101 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
4102   do {                                                                  \
4103     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
4104     char *intermediate_char_94 = "()*+";                                \
4105     char *intermediate_char_96 = ",-./";                                \
4106     int revision = -1;                                                  \
4107     int c;                                                              \
4108                                                                         \
4109     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
4110       revision = CHARSET_ISO_REVISION (charset);                        \
4111                                                                         \
4112     if (revision >= 0)                                                  \
4113       {                                                                 \
4114         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
4115         EMIT_ONE_BYTE ('@' + revision);                                 \
4116       }                                                                 \
4117     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
4118     if (CHARSET_DIMENSION (charset) == 1)                               \
4119       {                                                                 \
4120         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4121           c = intermediate_char_94[reg];                                \
4122         else                                                            \
4123           c = intermediate_char_96[reg];                                \
4124         EMIT_ONE_ASCII_BYTE (c);                                        \
4125       }                                                                 \
4126     else                                                                \
4127       {                                                                 \
4128         EMIT_ONE_ASCII_BYTE ('$');                                      \
4129         if (! CHARSET_ISO_CHARS_96 (charset))                           \
4130           {                                                             \
4131             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
4132                 || reg != 0                                             \
4133                 || final_char < '@' || final_char > 'B')                \
4134               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
4135           }                                                             \
4136         else                                                            \
4137           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
4138       }                                                                 \
4139     EMIT_ONE_ASCII_BYTE (final_char);                                   \
4140                                                                         \
4141     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
4142   } while (0)
4143
4144
4145 /* The following two macros produce codes (control character or escape
4146    sequence) for ISO2022 single-shift functions (single-shift-2 and
4147    single-shift-3).  */
4148
4149 #define ENCODE_SINGLE_SHIFT_2                                           \
4150   do {                                                                  \
4151     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4152       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
4153     else                                                                \
4154       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
4155     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4156   } while (0)
4157
4158
4159 #define ENCODE_SINGLE_SHIFT_3                                           \
4160   do {                                                                  \
4161     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
4162       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
4163     else                                                                \
4164       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
4165     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
4166   } while (0)
4167
4168
4169 /* The following four macros produce codes (control character or
4170    escape sequence) for ISO2022 locking-shift functions (shift-in,
4171    shift-out, locking-shift-2, and locking-shift-3).  */
4172
4173 #define ENCODE_SHIFT_IN                                 \
4174   do {                                                  \
4175     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
4176     CODING_ISO_INVOCATION (coding, 0) = 0;              \
4177   } while (0)
4178
4179
4180 #define ENCODE_SHIFT_OUT                                \
4181   do {                                                  \
4182     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
4183     CODING_ISO_INVOCATION (coding, 0) = 1;              \
4184   } while (0)
4185
4186
4187 #define ENCODE_LOCKING_SHIFT_2                          \
4188   do {                                                  \
4189     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4190     CODING_ISO_INVOCATION (coding, 0) = 2;              \
4191   } while (0)
4192
4193
4194 #define ENCODE_LOCKING_SHIFT_3                          \
4195   do {                                                  \
4196     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
4197     CODING_ISO_INVOCATION (coding, 0) = 3;              \
4198   } while (0)
4199
4200
4201 /* Produce codes for a DIMENSION1 character whose character set is
4202    CHARSET and whose position-code is C1.  Designation and invocation
4203    sequences are also produced in advance if necessary.  */
4204
4205 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
4206   do {                                                                  \
4207     int id = CHARSET_ID (charset);                                      \
4208                                                                         \
4209     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
4210         && id == charset_ascii)                                         \
4211       {                                                                 \
4212         id = charset_jisx0201_roman;                                    \
4213         charset = CHARSET_FROM_ID (id);                                 \
4214       }                                                                 \
4215                                                                         \
4216     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4217       {                                                                 \
4218         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4219           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
4220         else                                                            \
4221           EMIT_ONE_BYTE (c1 | 0x80);                                    \
4222         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4223         break;                                                          \
4224       }                                                                 \
4225     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4226       {                                                                 \
4227         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
4228         break;                                                          \
4229       }                                                                 \
4230     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4231       {                                                                 \
4232         EMIT_ONE_BYTE (c1 | 0x80);                                      \
4233         break;                                                          \
4234       }                                                                 \
4235     else                                                                \
4236       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4237          must invoke it, or, at first, designate it to some graphic     \
4238          register.  Then repeat the loop to actually produce the        \
4239          character.  */                                                 \
4240       dst = encode_invocation_designation (charset, coding, dst,        \
4241                                            &produced_chars);            \
4242   } while (1)
4243
4244
4245 /* Produce codes for a DIMENSION2 character whose character set is
4246    CHARSET and whose position-codes are C1 and C2.  Designation and
4247    invocation codes are also produced in advance if necessary.  */
4248
4249 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
4250   do {                                                                  \
4251     int id = CHARSET_ID (charset);                                      \
4252                                                                         \
4253     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
4254         && id == charset_jisx0208)                                      \
4255       {                                                                 \
4256         id = charset_jisx0208_1978;                                     \
4257         charset = CHARSET_FROM_ID (id);                                 \
4258       }                                                                 \
4259                                                                         \
4260     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
4261       {                                                                 \
4262         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
4263           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
4264         else                                                            \
4265           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
4266         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
4267         break;                                                          \
4268       }                                                                 \
4269     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
4270       {                                                                 \
4271         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
4272         break;                                                          \
4273       }                                                                 \
4274     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
4275       {                                                                 \
4276         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
4277         break;                                                          \
4278       }                                                                 \
4279     else                                                                \
4280       /* Since CHARSET is not yet invoked to any graphic planes, we     \
4281          must invoke it, or, at first, designate it to some graphic     \
4282          register.  Then repeat the loop to actually produce the        \
4283          character.  */                                                 \
4284       dst = encode_invocation_designation (charset, coding, dst,        \
4285                                            &produced_chars);            \
4286   } while (1)
4287
4288
4289 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
4290   do {                                                                     \
4291     int code = ENCODE_CHAR ((charset), (c));                               \
4292                                                                            \
4293     if (CHARSET_DIMENSION (charset) == 1)                                  \
4294       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
4295     else                                                                   \
4296       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
4297   } while (0)
4298
4299
4300 /* Produce designation and invocation codes at a place pointed by DST
4301    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
4302    Return new DST.  */
4303
4304 unsigned char *
4305 encode_invocation_designation (struct charset *charset,
4306                                struct coding_system *coding,
4307                                unsigned char *dst, int *p_nchars)
4308 {
4309   int multibytep = coding->dst_multibyte;
4310   int produced_chars = *p_nchars;
4311   int reg;                      /* graphic register number */
4312   int id = CHARSET_ID (charset);
4313
4314   /* At first, check designations.  */
4315   for (reg = 0; reg < 4; reg++)
4316     if (id == CODING_ISO_DESIGNATION (coding, reg))
4317       break;
4318
4319   if (reg >= 4)
4320     {
4321       /* CHARSET is not yet designated to any graphic registers.  */
4322       /* At first check the requested designation.  */
4323       reg = CODING_ISO_REQUEST (coding, id);
4324       if (reg < 0)
4325         /* Since CHARSET requests no special designation, designate it
4326            to graphic register 0.  */
4327         reg = 0;
4328
4329       ENCODE_DESIGNATION (charset, reg, coding);
4330     }
4331
4332   if (CODING_ISO_INVOCATION (coding, 0) != reg
4333       && CODING_ISO_INVOCATION (coding, 1) != reg)
4334     {
4335       /* Since the graphic register REG is not invoked to any graphic
4336          planes, invoke it to graphic plane 0.  */
4337       switch (reg)
4338         {
4339         case 0:                 /* graphic register 0 */
4340           ENCODE_SHIFT_IN;
4341           break;
4342
4343         case 1:                 /* graphic register 1 */
4344           ENCODE_SHIFT_OUT;
4345           break;
4346
4347         case 2:                 /* graphic register 2 */
4348           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4349             ENCODE_SINGLE_SHIFT_2;
4350           else
4351             ENCODE_LOCKING_SHIFT_2;
4352           break;
4353
4354         case 3:                 /* graphic register 3 */
4355           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
4356             ENCODE_SINGLE_SHIFT_3;
4357           else
4358             ENCODE_LOCKING_SHIFT_3;
4359           break;
4360         }
4361     }
4362
4363   *p_nchars = produced_chars;
4364   return dst;
4365 }
4366
4367 /* The following three macros produce codes for indicating direction
4368    of text.  */
4369 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
4370   do {                                                                  \
4371     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
4372       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
4373     else                                                                \
4374       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
4375   } while (0)
4376
4377
4378 #define ENCODE_DIRECTION_R2L()                  \
4379   do {                                          \
4380     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4381     EMIT_TWO_ASCII_BYTES ('2', ']');            \
4382   } while (0)
4383
4384
4385 #define ENCODE_DIRECTION_L2R()                  \
4386   do {                                          \
4387     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
4388     EMIT_TWO_ASCII_BYTES ('0', ']');            \
4389   } while (0)
4390
4391
4392 /* Produce codes for designation and invocation to reset the graphic
4393    planes and registers to initial state.  */
4394 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
4395   do {                                                                  \
4396     int reg;                                                            \
4397     struct charset *charset;                                            \
4398                                                                         \
4399     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
4400       ENCODE_SHIFT_IN;                                                  \
4401     for (reg = 0; reg < 4; reg++)                                       \
4402       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
4403           && (CODING_ISO_DESIGNATION (coding, reg)                      \
4404               != CODING_ISO_INITIAL (coding, reg)))                     \
4405         {                                                               \
4406           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
4407           ENCODE_DESIGNATION (charset, reg, coding);                    \
4408         }                                                               \
4409   } while (0)
4410
4411
4412 /* Produce designation sequences of charsets in the line started from
4413    SRC to a place pointed by DST, and return updated DST.
4414
4415    If the current block ends before any end-of-line, we may fail to
4416    find all the necessary designations.  */
4417
4418 static unsigned char *
4419 encode_designation_at_bol (struct coding_system *coding, int *charbuf,
4420                            int *charbuf_end, unsigned char *dst)
4421 {
4422   struct charset *charset;
4423   /* Table of charsets to be designated to each graphic register.  */
4424   int r[4];
4425   int c, found = 0, reg;
4426   int produced_chars = 0;
4427   int multibytep = coding->dst_multibyte;
4428   Lisp_Object attrs;
4429   Lisp_Object charset_list;
4430
4431   attrs = CODING_ID_ATTRS (coding->id);
4432   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4433   if (EQ (charset_list, Qiso_2022))
4434     charset_list = Viso_2022_charset_list;
4435
4436   for (reg = 0; reg < 4; reg++)
4437     r[reg] = -1;
4438
4439   while (found < 4)
4440     {
4441       int id;
4442
4443       c = *charbuf++;
4444       if (c == '\n')
4445         break;
4446       charset = char_charset (c, charset_list, NULL);
4447       id = CHARSET_ID (charset);
4448       reg = CODING_ISO_REQUEST (coding, id);
4449       if (reg >= 0 && r[reg] < 0)
4450         {
4451           found++;
4452           r[reg] = id;
4453         }
4454     }
4455
4456   if (found)
4457     {
4458       for (reg = 0; reg < 4; reg++)
4459         if (r[reg] >= 0
4460             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
4461           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
4462     }
4463
4464   return dst;
4465 }
4466
4467 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
4468
4469 static int
4470 encode_coding_iso_2022 (struct coding_system *coding)
4471 {
4472   int multibytep = coding->dst_multibyte;
4473   int *charbuf = coding->charbuf;
4474   int *charbuf_end = charbuf + coding->charbuf_used;
4475   unsigned char *dst = coding->destination + coding->produced;
4476   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4477   int safe_room = 16;
4478   int bol_designation
4479     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
4480        && CODING_ISO_BOL (coding));
4481   int produced_chars = 0;
4482   Lisp_Object attrs, eol_type, charset_list;
4483   int ascii_compatible;
4484   int c;
4485   int preferred_charset_id = -1;
4486
4487   CODING_GET_INFO (coding, attrs, charset_list);
4488   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
4489   if (VECTORP (eol_type))
4490     eol_type = Qunix;
4491
4492   setup_iso_safe_charsets (attrs);
4493   /* Charset list may have been changed.  */
4494   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
4495   coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
4496
4497   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4498
4499   while (charbuf < charbuf_end)
4500     {
4501       ASSURE_DESTINATION (safe_room);
4502
4503       if (bol_designation)
4504         {
4505           unsigned char *dst_prev = dst;
4506
4507           /* We have to produce designation sequences if any now.  */
4508           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
4509           bol_designation = 0;
4510           /* We are sure that designation sequences are all ASCII bytes.  */
4511           produced_chars += dst - dst_prev;
4512         }
4513
4514       c = *charbuf++;
4515
4516       if (c < 0)
4517         {
4518           /* Handle an annotation.  */
4519           switch (*charbuf)
4520             {
4521             case CODING_ANNOTATE_COMPOSITION_MASK:
4522               /* Not yet implemented.  */
4523               break;
4524             case CODING_ANNOTATE_CHARSET_MASK:
4525               preferred_charset_id = charbuf[2];
4526               if (preferred_charset_id >= 0
4527                   && NILP (Fmemq (make_number (preferred_charset_id),
4528                                   charset_list)))
4529                 preferred_charset_id = -1;
4530               break;
4531             default:
4532               abort ();
4533             }
4534           charbuf += -c - 1;
4535           continue;
4536         }
4537
4538       /* Now encode the character C.  */
4539       if (c < 0x20 || c == 0x7F)
4540         {
4541           if (c == '\n'
4542               || (c == '\r' && EQ (eol_type, Qmac)))
4543             {
4544               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4545                 ENCODE_RESET_PLANE_AND_REGISTER ();
4546               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
4547                 {
4548                   int i;
4549
4550                   for (i = 0; i < 4; i++)
4551                     CODING_ISO_DESIGNATION (coding, i)
4552                       = CODING_ISO_INITIAL (coding, i);
4553                 }
4554               bol_designation
4555                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
4556             }
4557           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
4558             ENCODE_RESET_PLANE_AND_REGISTER ();
4559           EMIT_ONE_ASCII_BYTE (c);
4560         }
4561       else if (ASCII_CHAR_P (c))
4562         {
4563           if (ascii_compatible)
4564             EMIT_ONE_ASCII_BYTE (c);
4565           else
4566             {
4567               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
4568               ENCODE_ISO_CHARACTER (charset, c);
4569             }
4570         }
4571       else if (CHAR_BYTE8_P (c))
4572         {
4573           c = CHAR_TO_BYTE8 (c);
4574           EMIT_ONE_BYTE (c);
4575         }
4576       else
4577         {
4578           struct charset *charset;
4579
4580           if (preferred_charset_id >= 0)
4581             {
4582               charset = CHARSET_FROM_ID (preferred_charset_id);
4583               if (! CHAR_CHARSET_P (c, charset))
4584                 charset = char_charset (c, charset_list, NULL);
4585             }
4586           else
4587             charset = char_charset (c, charset_list, NULL);
4588           if (!charset)
4589             {
4590               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4591                 {
4592                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4593                   charset = CHARSET_FROM_ID (charset_ascii);
4594                 }
4595               else
4596                 {
4597                   c = coding->default_char;
4598                   charset = char_charset (c, charset_list, NULL);
4599                 }
4600             }
4601           ENCODE_ISO_CHARACTER (charset, c);
4602         }
4603     }
4604
4605   if (coding->mode & CODING_MODE_LAST_BLOCK
4606       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4607     {
4608       ASSURE_DESTINATION (safe_room);
4609       ENCODE_RESET_PLANE_AND_REGISTER ();
4610     }
4611   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4612   CODING_ISO_BOL (coding) = bol_designation;
4613   coding->produced_char += produced_chars;
4614   coding->produced = dst - coding->destination;
4615   return 0;
4616 }
4617
4618 \f
4619 /*** 8,9. SJIS and BIG5 handlers ***/
4620
4621 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4622    quite widely.  So, for the moment, Emacs supports them in the bare
4623    C code.  But, in the future, they may be supported only by CCL.  */
4624
4625 /* SJIS is a coding system encoding three character sets: ASCII, right
4626    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4627    as is.  A character of charset katakana-jisx0201 is encoded by
4628    "position-code + 0x80".  A character of charset japanese-jisx0208
4629    is encoded in 2-byte but two position-codes are divided and shifted
4630    so that it fit in the range below.
4631
4632    --- CODE RANGE of SJIS ---
4633    (character set)      (range)
4634    ASCII                0x00 .. 0x7F
4635    KATAKANA-JISX0201    0xA0 .. 0xDF
4636    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4637             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4638    -------------------------------
4639
4640 */
4641
4642 /* BIG5 is a coding system encoding two character sets: ASCII and
4643    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4644    character set and is encoded in two-byte.
4645
4646    --- CODE RANGE of BIG5 ---
4647    (character set)      (range)
4648    ASCII                0x00 .. 0x7F
4649    Big5 (1st byte)      0xA1 .. 0xFE
4650         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4651    --------------------------
4652
4653   */
4654
4655 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4656    Check if a text is encoded in SJIS.  If it is, return
4657    CATEGORY_MASK_SJIS, else return 0.  */
4658
4659 static int
4660 detect_coding_sjis (struct coding_system *coding,
4661                     struct coding_detection_info *detect_info)
4662 {
4663   const unsigned char *src = coding->source, *src_base;
4664   const unsigned char *src_end = coding->source + coding->src_bytes;
4665   int multibytep = coding->src_multibyte;
4666   int consumed_chars = 0;
4667   int found = 0;
4668   int c;
4669   Lisp_Object attrs, charset_list;
4670   int max_first_byte_of_2_byte_code;
4671
4672   CODING_GET_INFO (coding, attrs, charset_list);
4673   max_first_byte_of_2_byte_code
4674     = (XINT (Flength (charset_list)) > 3 ? 0xFC : 0xEF);
4675
4676   detect_info->checked |= CATEGORY_MASK_SJIS;
4677   /* A coding system of this category is always ASCII compatible.  */
4678   src += coding->head_ascii;
4679
4680   while (1)
4681     {
4682       src_base = src;
4683       ONE_MORE_BYTE (c);
4684       if (c < 0x80)
4685         continue;
4686       if ((c >= 0x81 && c <= 0x9F)
4687           || (c >= 0xE0 && c <= max_first_byte_of_2_byte_code))
4688         {
4689           ONE_MORE_BYTE (c);
4690           if (c < 0x40 || c == 0x7F || c > 0xFC)
4691             break;
4692           found = CATEGORY_MASK_SJIS;
4693         }
4694       else if (c >= 0xA0 && c < 0xE0)
4695         found = CATEGORY_MASK_SJIS;
4696       else
4697         break;
4698     }
4699   detect_info->rejected |= CATEGORY_MASK_SJIS;
4700   return 0;
4701
4702  no_more_source:
4703   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4704     {
4705       detect_info->rejected |= CATEGORY_MASK_SJIS;
4706       return 0;
4707     }
4708   detect_info->found |= found;
4709   return 1;
4710 }
4711
4712 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4713    Check if a text is encoded in BIG5.  If it is, return
4714    CATEGORY_MASK_BIG5, else return 0.  */
4715
4716 static int
4717 detect_coding_big5 (struct coding_system *coding,
4718                     struct coding_detection_info *detect_info)
4719 {
4720   const unsigned char *src = coding->source, *src_base;
4721   const unsigned char *src_end = coding->source + coding->src_bytes;
4722   int multibytep = coding->src_multibyte;
4723   int consumed_chars = 0;
4724   int found = 0;
4725   int c;
4726
4727   detect_info->checked |= CATEGORY_MASK_BIG5;
4728   /* A coding system of this category is always ASCII compatible.  */
4729   src += coding->head_ascii;
4730
4731   while (1)
4732     {
4733       src_base = src;
4734       ONE_MORE_BYTE (c);
4735       if (c < 0x80)
4736         continue;
4737       if (c >= 0xA1)
4738         {
4739           ONE_MORE_BYTE (c);
4740           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4741             return 0;
4742           found = CATEGORY_MASK_BIG5;
4743         }
4744       else
4745         break;
4746     }
4747   detect_info->rejected |= CATEGORY_MASK_BIG5;
4748   return 0;
4749
4750  no_more_source:
4751   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4752     {
4753       detect_info->rejected |= CATEGORY_MASK_BIG5;
4754       return 0;
4755     }
4756   detect_info->found |= found;
4757   return 1;
4758 }
4759
4760 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4761    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4762
4763 static void
4764 decode_coding_sjis (struct coding_system *coding)
4765 {
4766   const unsigned char *src = coding->source + coding->consumed;
4767   const unsigned char *src_end = coding->source + coding->src_bytes;
4768   const unsigned char *src_base;
4769   int *charbuf = coding->charbuf + coding->charbuf_used;
4770   /* We may produce one charset annocation in one loop and one more at
4771      the end.  */
4772   int *charbuf_end
4773     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4774   int consumed_chars = 0, consumed_chars_base;
4775   int multibytep = coding->src_multibyte;
4776   struct charset *charset_roman, *charset_kanji, *charset_kana;
4777   struct charset *charset_kanji2;
4778   Lisp_Object attrs, charset_list, val;
4779   int char_offset = coding->produced_char;
4780   int last_offset = char_offset;
4781   int last_id = charset_ascii;
4782   int eol_crlf =
4783     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4784   int byte_after_cr = -1;
4785
4786   CODING_GET_INFO (coding, attrs, charset_list);
4787
4788   val = charset_list;
4789   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4790   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4791   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4792   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4793
4794   while (1)
4795     {
4796       int c, c1;
4797       struct charset *charset;
4798
4799       src_base = src;
4800       consumed_chars_base = consumed_chars;
4801
4802       if (charbuf >= charbuf_end)
4803         {
4804           if (byte_after_cr >= 0)
4805             src_base--;
4806           break;
4807         }
4808
4809       if (byte_after_cr >= 0)
4810         c = byte_after_cr, byte_after_cr = -1;
4811       else
4812         ONE_MORE_BYTE (c);
4813       if (c < 0)
4814         goto invalid_code;
4815       if (c < 0x80)
4816         {
4817           if (eol_crlf && c == '\r')
4818             ONE_MORE_BYTE (byte_after_cr);
4819           charset = charset_roman;
4820         }
4821       else if (c == 0x80 || c == 0xA0)
4822         goto invalid_code;
4823       else if (c >= 0xA1 && c <= 0xDF)
4824         {
4825           /* SJIS -> JISX0201-Kana */
4826           c &= 0x7F;
4827           charset = charset_kana;
4828         }
4829       else if (c <= 0xEF)
4830         {
4831           /* SJIS -> JISX0208 */
4832           ONE_MORE_BYTE (c1);
4833           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4834             goto invalid_code;
4835           c = (c << 8) | c1;
4836           SJIS_TO_JIS (c);
4837           charset = charset_kanji;
4838         }
4839       else if (c <= 0xFC && charset_kanji2)
4840         {
4841           /* SJIS -> JISX0213-2 */
4842           ONE_MORE_BYTE (c1);
4843           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4844             goto invalid_code;
4845           c = (c << 8) | c1;
4846           SJIS_TO_JIS2 (c);
4847           charset = charset_kanji2;
4848         }
4849       else
4850         goto invalid_code;
4851       if (charset->id != charset_ascii
4852           && last_id != charset->id)
4853         {
4854           if (last_id != charset_ascii)
4855             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4856           last_id = charset->id;
4857           last_offset = char_offset;
4858         }
4859       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4860       *charbuf++ = c;
4861       char_offset++;
4862       continue;
4863
4864     invalid_code:
4865       src = src_base;
4866       consumed_chars = consumed_chars_base;
4867       ONE_MORE_BYTE (c);
4868       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4869       char_offset++;
4870       coding->errors++;
4871     }
4872
4873  no_more_source:
4874   if (last_id != charset_ascii)
4875     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4876   coding->consumed_char += consumed_chars_base;
4877   coding->consumed = src_base - coding->source;
4878   coding->charbuf_used = charbuf - coding->charbuf;
4879 }
4880
4881 static void
4882 decode_coding_big5 (struct coding_system *coding)
4883 {
4884   const unsigned char *src = coding->source + coding->consumed;
4885   const unsigned char *src_end = coding->source + coding->src_bytes;
4886   const unsigned char *src_base;
4887   int *charbuf = coding->charbuf + coding->charbuf_used;
4888   /* We may produce one charset annocation in one loop and one more at
4889      the end.  */
4890   int *charbuf_end
4891     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
4892   int consumed_chars = 0, consumed_chars_base;
4893   int multibytep = coding->src_multibyte;
4894   struct charset *charset_roman, *charset_big5;
4895   Lisp_Object attrs, charset_list, val;
4896   int char_offset = coding->produced_char;
4897   int last_offset = char_offset;
4898   int last_id = charset_ascii;
4899   int eol_crlf =
4900     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4901   int byte_after_cr = -1;
4902
4903   CODING_GET_INFO (coding, attrs, charset_list);
4904   val = charset_list;
4905   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4906   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4907
4908   while (1)
4909     {
4910       int c, c1;
4911       struct charset *charset;
4912
4913       src_base = src;
4914       consumed_chars_base = consumed_chars;
4915
4916       if (charbuf >= charbuf_end)
4917         {
4918           if (byte_after_cr >= 0)
4919             src_base--;
4920           break;
4921         }
4922
4923       if (byte_after_cr >= 0)
4924         c = byte_after_cr, byte_after_cr = -1;
4925       else
4926         ONE_MORE_BYTE (c);
4927
4928       if (c < 0)
4929         goto invalid_code;
4930       if (c < 0x80)
4931         {
4932           if (eol_crlf && c == '\r')
4933             ONE_MORE_BYTE (byte_after_cr);
4934           charset = charset_roman;
4935         }
4936       else
4937         {
4938           /* BIG5 -> Big5 */
4939           if (c < 0xA1 || c > 0xFE)
4940             goto invalid_code;
4941           ONE_MORE_BYTE (c1);
4942           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4943             goto invalid_code;
4944           c = c << 8 | c1;
4945           charset = charset_big5;
4946         }
4947       if (charset->id != charset_ascii
4948           && last_id != charset->id)
4949         {
4950           if (last_id != charset_ascii)
4951             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4952           last_id = charset->id;
4953           last_offset = char_offset;
4954         }
4955       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4956       *charbuf++ = c;
4957       char_offset++;
4958       continue;
4959
4960     invalid_code:
4961       src = src_base;
4962       consumed_chars = consumed_chars_base;
4963       ONE_MORE_BYTE (c);
4964       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4965       char_offset++;
4966       coding->errors++;
4967     }
4968
4969  no_more_source:
4970   if (last_id != charset_ascii)
4971     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4972   coding->consumed_char += consumed_chars_base;
4973   coding->consumed = src_base - coding->source;
4974   coding->charbuf_used = charbuf - coding->charbuf;
4975 }
4976
4977 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4978    This function can encode charsets `ascii', `katakana-jisx0201',
4979    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4980    are sure that all these charsets are registered as official charset
4981    (i.e. do not have extended leading-codes).  Characters of other
4982    charsets are produced without any encoding.  If SJIS_P is 1, encode
4983    SJIS text, else encode BIG5 text.  */
4984
4985 static int
4986 encode_coding_sjis (struct coding_system *coding)
4987 {
4988   int multibytep = coding->dst_multibyte;
4989   int *charbuf = coding->charbuf;
4990   int *charbuf_end = charbuf + coding->charbuf_used;
4991   unsigned char *dst = coding->destination + coding->produced;
4992   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4993   int safe_room = 4;
4994   int produced_chars = 0;
4995   Lisp_Object attrs, charset_list, val;
4996   int ascii_compatible;
4997   struct charset *charset_roman, *charset_kanji, *charset_kana;
4998   struct charset *charset_kanji2;
4999   int c;
5000
5001   CODING_GET_INFO (coding, attrs, charset_list);
5002   val = charset_list;
5003   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5004   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5005   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5006   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
5007
5008   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5009
5010   while (charbuf < charbuf_end)
5011     {
5012       ASSURE_DESTINATION (safe_room);
5013       c = *charbuf++;
5014       /* Now encode the character C.  */
5015       if (ASCII_CHAR_P (c) && ascii_compatible)
5016         EMIT_ONE_ASCII_BYTE (c);
5017       else if (CHAR_BYTE8_P (c))
5018         {
5019           c = CHAR_TO_BYTE8 (c);
5020           EMIT_ONE_BYTE (c);
5021         }
5022       else
5023         {
5024           unsigned code;
5025           struct charset *charset = char_charset (c, charset_list, &code);
5026
5027           if (!charset)
5028             {
5029               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5030                 {
5031                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5032                   charset = CHARSET_FROM_ID (charset_ascii);
5033                 }
5034               else
5035                 {
5036                   c = coding->default_char;
5037                   charset = char_charset (c, charset_list, &code);
5038                 }
5039             }
5040           if (code == CHARSET_INVALID_CODE (charset))
5041             abort ();
5042           if (charset == charset_kanji)
5043             {
5044               int c1, c2;
5045               JIS_TO_SJIS (code);
5046               c1 = code >> 8, c2 = code & 0xFF;
5047               EMIT_TWO_BYTES (c1, c2);
5048             }
5049           else if (charset == charset_kana)
5050             EMIT_ONE_BYTE (code | 0x80);
5051           else if (charset_kanji2 && charset == charset_kanji2)
5052             {
5053               int c1, c2;
5054
5055               c1 = code >> 8;
5056               if (c1 == 0x21 || (c1 >= 0x23 && c1 <= 0x25)
5057                   || c1 == 0x28
5058                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
5059                 {
5060                   JIS_TO_SJIS2 (code);
5061                   c1 = code >> 8, c2 = code & 0xFF;
5062                   EMIT_TWO_BYTES (c1, c2);
5063                 }
5064               else
5065                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
5066             }
5067           else
5068             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5069         }
5070     }
5071   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5072   coding->produced_char += produced_chars;
5073   coding->produced = dst - coding->destination;
5074   return 0;
5075 }
5076
5077 static int
5078 encode_coding_big5 (struct coding_system *coding)
5079 {
5080   int multibytep = coding->dst_multibyte;
5081   int *charbuf = coding->charbuf;
5082   int *charbuf_end = charbuf + coding->charbuf_used;
5083   unsigned char *dst = coding->destination + coding->produced;
5084   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5085   int safe_room = 4;
5086   int produced_chars = 0;
5087   Lisp_Object attrs, charset_list, val;
5088   int ascii_compatible;
5089   struct charset *charset_roman, *charset_big5;
5090   int c;
5091
5092   CODING_GET_INFO (coding, attrs, charset_list);
5093   val = charset_list;
5094   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
5095   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
5096   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5097
5098   while (charbuf < charbuf_end)
5099     {
5100       ASSURE_DESTINATION (safe_room);
5101       c = *charbuf++;
5102       /* Now encode the character C.  */
5103       if (ASCII_CHAR_P (c) && ascii_compatible)
5104         EMIT_ONE_ASCII_BYTE (c);
5105       else if (CHAR_BYTE8_P (c))
5106         {
5107           c = CHAR_TO_BYTE8 (c);
5108           EMIT_ONE_BYTE (c);
5109         }
5110       else
5111         {
5112           unsigned code;
5113           struct charset *charset = char_charset (c, charset_list, &code);
5114
5115           if (! charset)
5116             {
5117               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5118                 {
5119                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5120                   charset = CHARSET_FROM_ID (charset_ascii);
5121                 }
5122               else
5123                 {
5124                   c = coding->default_char;
5125                   charset = char_charset (c, charset_list, &code);
5126                 }
5127             }
5128           if (code == CHARSET_INVALID_CODE (charset))
5129             abort ();
5130           if (charset == charset_big5)
5131             {
5132               int c1, c2;
5133
5134               c1 = code >> 8, c2 = code & 0xFF;
5135               EMIT_TWO_BYTES (c1, c2);
5136             }
5137           else
5138             EMIT_ONE_ASCII_BYTE (code & 0x7F);
5139         }
5140     }
5141   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5142   coding->produced_char += produced_chars;
5143   coding->produced = dst - coding->destination;
5144   return 0;
5145 }
5146
5147 \f
5148 /*** 10. CCL handlers ***/
5149
5150 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5151    Check if a text is encoded in a coding system of which
5152    encoder/decoder are written in CCL program.  If it is, return
5153    CATEGORY_MASK_CCL, else return 0.  */
5154
5155 static int
5156 detect_coding_ccl (struct coding_system *coding,
5157                    struct coding_detection_info *detect_info)
5158 {
5159   const unsigned char *src = coding->source, *src_base;
5160   const unsigned char *src_end = coding->source + coding->src_bytes;
5161   int multibytep = coding->src_multibyte;
5162   int consumed_chars = 0;
5163   int found = 0;
5164   unsigned char *valids;
5165   int head_ascii = coding->head_ascii;
5166   Lisp_Object attrs;
5167
5168   detect_info->checked |= CATEGORY_MASK_CCL;
5169
5170   coding = &coding_categories[coding_category_ccl];
5171   valids = CODING_CCL_VALIDS (coding);
5172   attrs = CODING_ID_ATTRS (coding->id);
5173   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5174     src += head_ascii;
5175
5176   while (1)
5177     {
5178       int c;
5179
5180       src_base = src;
5181       ONE_MORE_BYTE (c);
5182       if (c < 0 || ! valids[c])
5183         break;
5184       if ((valids[c] > 1))
5185         found = CATEGORY_MASK_CCL;
5186     }
5187   detect_info->rejected |= CATEGORY_MASK_CCL;
5188   return 0;
5189
5190  no_more_source:
5191   detect_info->found |= found;
5192   return 1;
5193 }
5194
5195 static void
5196 decode_coding_ccl (struct coding_system *coding)
5197 {
5198   const unsigned char *src = coding->source + coding->consumed;
5199   const unsigned char *src_end = coding->source + coding->src_bytes;
5200   int *charbuf = coding->charbuf + coding->charbuf_used;
5201   int *charbuf_end = coding->charbuf + coding->charbuf_size;
5202   int consumed_chars = 0;
5203   int multibytep = coding->src_multibyte;
5204   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5205   int source_charbuf[1024];
5206   int source_byteidx[1025];
5207   Lisp_Object attrs, charset_list;
5208
5209   CODING_GET_INFO (coding, attrs, charset_list);
5210
5211   while (1)
5212     {
5213       const unsigned char *p = src;
5214       int i = 0;
5215
5216       if (multibytep)
5217         {
5218           while (i < 1024 && p < src_end)
5219             {
5220               source_byteidx[i] = p - src;
5221               source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
5222             }
5223           source_byteidx[i] = p - src;
5224         }
5225       else
5226         while (i < 1024 && p < src_end)
5227           source_charbuf[i++] = *p++;
5228
5229       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
5230         ccl->last_block = 1;
5231       ccl_driver (ccl, source_charbuf, charbuf, i, charbuf_end - charbuf,
5232                   charset_list);
5233       charbuf += ccl->produced;
5234       if (multibytep)
5235         src += source_byteidx[ccl->consumed];
5236       else
5237         src += ccl->consumed;
5238       consumed_chars += ccl->consumed;
5239       if (p == src_end || ccl->status != CCL_STAT_SUSPEND_BY_SRC)
5240         break;
5241     }
5242
5243   switch (ccl->status)
5244     {
5245     case CCL_STAT_SUSPEND_BY_SRC:
5246       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5247       break;
5248     case CCL_STAT_SUSPEND_BY_DST:
5249       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5250       break;
5251     case CCL_STAT_QUIT:
5252     case CCL_STAT_INVALID_CMD:
5253       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5254       break;
5255     default:
5256       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5257       break;
5258     }
5259   coding->consumed_char += consumed_chars;
5260   coding->consumed = src - coding->source;
5261   coding->charbuf_used = charbuf - coding->charbuf;
5262 }
5263
5264 static int
5265 encode_coding_ccl (struct coding_system *coding)
5266 {
5267   struct ccl_program *ccl = &coding->spec.ccl->ccl;
5268   int multibytep = coding->dst_multibyte;
5269   int *charbuf = coding->charbuf;
5270   int *charbuf_end = charbuf + coding->charbuf_used;
5271   unsigned char *dst = coding->destination + coding->produced;
5272   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5273   int destination_charbuf[1024];
5274   int i, produced_chars = 0;
5275   Lisp_Object attrs, charset_list;
5276
5277   CODING_GET_INFO (coding, attrs, charset_list);
5278   if (coding->consumed_char == coding->src_chars
5279       && coding->mode & CODING_MODE_LAST_BLOCK)
5280     ccl->last_block = 1;
5281
5282   while (charbuf < charbuf_end)
5283     {
5284       ccl_driver (ccl, charbuf, destination_charbuf,
5285                   charbuf_end - charbuf, 1024, charset_list);
5286       if (multibytep)
5287         {
5288           ASSURE_DESTINATION (ccl->produced * 2);
5289           for (i = 0; i < ccl->produced; i++)
5290             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
5291         }
5292       else
5293         {
5294           ASSURE_DESTINATION (ccl->produced);
5295           for (i = 0; i < ccl->produced; i++)
5296             *dst++ = destination_charbuf[i] & 0xFF;
5297           produced_chars += ccl->produced;
5298         }
5299       charbuf += ccl->consumed;
5300       if (ccl->status == CCL_STAT_QUIT
5301           || ccl->status == CCL_STAT_INVALID_CMD)
5302         break;
5303     }
5304
5305   switch (ccl->status)
5306     {
5307     case CCL_STAT_SUSPEND_BY_SRC:
5308       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5309       break;
5310     case CCL_STAT_SUSPEND_BY_DST:
5311       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
5312       break;
5313     case CCL_STAT_QUIT:
5314     case CCL_STAT_INVALID_CMD:
5315       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
5316       break;
5317     default:
5318       record_conversion_result (coding, CODING_RESULT_SUCCESS);
5319       break;
5320     }
5321
5322   coding->produced_char += produced_chars;
5323   coding->produced = dst - coding->destination;
5324   return 0;
5325 }
5326
5327
5328 \f
5329 /*** 10, 11. no-conversion handlers ***/
5330
5331 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
5332
5333 static void
5334 decode_coding_raw_text (struct coding_system *coding)
5335 {
5336   int eol_crlf =
5337     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5338
5339   coding->chars_at_source = 1;
5340   coding->consumed_char = coding->src_chars;
5341   coding->consumed = coding->src_bytes;
5342   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
5343     {
5344       coding->consumed_char--;
5345       coding->consumed--;
5346       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
5347     }
5348   else
5349     record_conversion_result (coding, CODING_RESULT_SUCCESS);
5350 }
5351
5352 static int
5353 encode_coding_raw_text (struct coding_system *coding)
5354 {
5355   int multibytep = coding->dst_multibyte;
5356   int *charbuf = coding->charbuf;
5357   int *charbuf_end = coding->charbuf + coding->charbuf_used;
5358   unsigned char *dst = coding->destination + coding->produced;
5359   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5360   int produced_chars = 0;
5361   int c;
5362
5363   if (multibytep)
5364     {
5365       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
5366
5367       if (coding->src_multibyte)
5368         while (charbuf < charbuf_end)
5369           {
5370             ASSURE_DESTINATION (safe_room);
5371             c = *charbuf++;
5372             if (ASCII_CHAR_P (c))
5373               EMIT_ONE_ASCII_BYTE (c);
5374             else if (CHAR_BYTE8_P (c))
5375               {
5376                 c = CHAR_TO_BYTE8 (c);
5377                 EMIT_ONE_BYTE (c);
5378               }
5379             else
5380               {
5381                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
5382
5383                 CHAR_STRING_ADVANCE (c, p1);
5384                 while (p0 < p1)
5385                   {
5386                     EMIT_ONE_BYTE (*p0);
5387                     p0++;
5388                   }
5389               }
5390           }
5391       else
5392         while (charbuf < charbuf_end)
5393           {
5394             ASSURE_DESTINATION (safe_room);
5395             c = *charbuf++;
5396             EMIT_ONE_BYTE (c);
5397           }
5398     }
5399   else
5400     {
5401       if (coding->src_multibyte)
5402         {
5403           int safe_room = MAX_MULTIBYTE_LENGTH;
5404
5405           while (charbuf < charbuf_end)
5406             {
5407               ASSURE_DESTINATION (safe_room);
5408               c = *charbuf++;
5409               if (ASCII_CHAR_P (c))
5410                 *dst++ = c;
5411               else if (CHAR_BYTE8_P (c))
5412                 *dst++ = CHAR_TO_BYTE8 (c);
5413               else
5414                 CHAR_STRING_ADVANCE (c, dst);
5415             }
5416         }
5417       else
5418         {
5419           ASSURE_DESTINATION (charbuf_end - charbuf);
5420           while (charbuf < charbuf_end && dst < dst_end)
5421             *dst++ = *charbuf++;
5422         }
5423       produced_chars = dst - (coding->destination + coding->produced);
5424     }
5425   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5426   coding->produced_char += produced_chars;
5427   coding->produced = dst - coding->destination;
5428   return 0;
5429 }
5430
5431 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
5432    Check if a text is encoded in a charset-based coding system.  If it
5433    is, return 1, else return 0.  */
5434
5435 static int
5436 detect_coding_charset (struct coding_system *coding,
5437                        struct coding_detection_info *detect_info)
5438 {
5439   const unsigned char *src = coding->source, *src_base;
5440   const unsigned char *src_end = coding->source + coding->src_bytes;
5441   int multibytep = coding->src_multibyte;
5442   int consumed_chars = 0;
5443   Lisp_Object attrs, valids, name;
5444   int found = 0;
5445   int head_ascii = coding->head_ascii;
5446   int check_latin_extra = 0;
5447
5448   detect_info->checked |= CATEGORY_MASK_CHARSET;
5449
5450   coding = &coding_categories[coding_category_charset];
5451   attrs = CODING_ID_ATTRS (coding->id);
5452   valids = AREF (attrs, coding_attr_charset_valids);
5453   name = CODING_ID_NAME (coding->id);
5454   if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5455                "iso-8859-", sizeof ("iso-8859-") - 1) == 0
5456       || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
5457                   "iso-latin-", sizeof ("iso-latin-") - 1) == 0)
5458     check_latin_extra = 1;
5459
5460   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
5461     src += head_ascii;
5462
5463   while (1)
5464     {
5465       int c;
5466       Lisp_Object val;
5467       struct charset *charset;
5468       int dim, idx;
5469
5470       src_base = src;
5471       ONE_MORE_BYTE (c);
5472       if (c < 0)
5473         continue;
5474       val = AREF (valids, c);
5475       if (NILP (val))
5476         break;
5477       if (c >= 0x80)
5478         {
5479           if (c < 0xA0
5480               && check_latin_extra
5481               && (!VECTORP (Vlatin_extra_code_table)
5482                   || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])))
5483             break;
5484           found = CATEGORY_MASK_CHARSET;
5485         }
5486       if (INTEGERP (val))
5487         {
5488           charset = CHARSET_FROM_ID (XFASTINT (val));
5489           dim = CHARSET_DIMENSION (charset);
5490           for (idx = 1; idx < dim; idx++)
5491             {
5492               if (src == src_end)
5493                 goto too_short;
5494               ONE_MORE_BYTE (c);
5495               if (c < charset->code_space[(dim - 1 - idx) * 2]
5496                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
5497                 break;
5498             }
5499           if (idx < dim)
5500             break;
5501         }
5502       else
5503         {
5504           idx = 1;
5505           for (; CONSP (val); val = XCDR (val))
5506             {
5507               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5508               dim = CHARSET_DIMENSION (charset);
5509               while (idx < dim)
5510                 {
5511                   if (src == src_end)
5512                     goto too_short;
5513                   ONE_MORE_BYTE (c);
5514                   if (c < charset->code_space[(dim - 1 - idx) * 4]
5515                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
5516                     break;
5517                   idx++;
5518                 }
5519               if (idx == dim)
5520                 {
5521                   val = Qnil;
5522                   break;
5523                 }
5524             }
5525           if (CONSP (val))
5526             break;
5527         }
5528     }
5529  too_short:
5530   detect_info->rejected |= CATEGORY_MASK_CHARSET;
5531   return 0;
5532
5533  no_more_source:
5534   detect_info->found |= found;
5535   return 1;
5536 }
5537
5538 static void
5539 decode_coding_charset (struct coding_system *coding)
5540 {
5541   const unsigned char *src = coding->source + coding->consumed;
5542   const unsigned char *src_end = coding->source + coding->src_bytes;
5543   const unsigned char *src_base;
5544   int *charbuf = coding->charbuf + coding->charbuf_used;
5545   /* We may produce one charset annocation in one loop and one more at
5546      the end.  */
5547   int *charbuf_end
5548     = coding->charbuf + coding->charbuf_size - (MAX_ANNOTATION_LENGTH * 2);
5549   int consumed_chars = 0, consumed_chars_base;
5550   int multibytep = coding->src_multibyte;
5551   Lisp_Object attrs, charset_list, valids;
5552   int char_offset = coding->produced_char;
5553   int last_offset = char_offset;
5554   int last_id = charset_ascii;
5555   int eol_crlf =
5556     !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
5557   int byte_after_cr = -1;
5558
5559   CODING_GET_INFO (coding, attrs, charset_list);
5560   valids = AREF (attrs, coding_attr_charset_valids);
5561
5562   while (1)
5563     {
5564       int c;
5565       Lisp_Object val;
5566       struct charset *charset;
5567       int dim;
5568       int len = 1;
5569       unsigned code;
5570
5571       src_base = src;
5572       consumed_chars_base = consumed_chars;
5573
5574       if (charbuf >= charbuf_end)
5575         {
5576           if (byte_after_cr >= 0)
5577             src_base--;
5578           break;
5579         }
5580
5581       if (byte_after_cr >= 0)
5582         {
5583           c = byte_after_cr;
5584           byte_after_cr = -1;
5585         }
5586       else
5587         {
5588           ONE_MORE_BYTE (c);
5589           if (eol_crlf && c == '\r')
5590             ONE_MORE_BYTE (byte_after_cr);
5591         }
5592       if (c < 0)
5593         goto invalid_code;
5594       code = c;
5595
5596       val = AREF (valids, c);
5597       if (! INTEGERP (val) && ! CONSP (val))
5598         goto invalid_code;
5599       if (INTEGERP (val))
5600         {
5601           charset = CHARSET_FROM_ID (XFASTINT (val));
5602           dim = CHARSET_DIMENSION (charset);
5603           while (len < dim)
5604             {
5605               ONE_MORE_BYTE (c);
5606               code = (code << 8) | c;
5607               len++;
5608             }
5609           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5610                               charset, code, c);
5611         }
5612       else
5613         {
5614           /* VAL is a list of charset IDs.  It is assured that the
5615              list is sorted by charset dimensions (smaller one
5616              comes first).  */
5617           while (CONSP (val))
5618             {
5619               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5620               dim = CHARSET_DIMENSION (charset);
5621               while (len < dim)
5622                 {
5623                   ONE_MORE_BYTE (c);
5624                   code = (code << 8) | c;
5625                   len++;
5626                 }
5627               CODING_DECODE_CHAR (coding, src, src_base,
5628                                   src_end, charset, code, c);
5629               if (c >= 0)
5630                 break;
5631               val = XCDR (val);
5632             }
5633         }
5634       if (c < 0)
5635         goto invalid_code;
5636       if (charset->id != charset_ascii
5637           && last_id != charset->id)
5638         {
5639           if (last_id != charset_ascii)
5640             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5641           last_id = charset->id;
5642           last_offset = char_offset;
5643         }
5644
5645       *charbuf++ = c;
5646       char_offset++;
5647       continue;
5648
5649     invalid_code:
5650       src = src_base;
5651       consumed_chars = consumed_chars_base;
5652       ONE_MORE_BYTE (c);
5653       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5654       char_offset++;
5655       coding->errors++;
5656     }
5657
5658  no_more_source:
5659   if (last_id != charset_ascii)
5660     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5661   coding->consumed_char += consumed_chars_base;
5662   coding->consumed = src_base - coding->source;
5663   coding->charbuf_used = charbuf - coding->charbuf;
5664 }
5665
5666 static int
5667 encode_coding_charset (struct coding_system *coding)
5668 {
5669   int multibytep = coding->dst_multibyte;
5670   int *charbuf = coding->charbuf;
5671   int *charbuf_end = charbuf + coding->charbuf_used;
5672   unsigned char *dst = coding->destination + coding->produced;
5673   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5674   int safe_room = MAX_MULTIBYTE_LENGTH;
5675   int produced_chars = 0;
5676   Lisp_Object attrs, charset_list;
5677   int ascii_compatible;
5678   int c;
5679
5680   CODING_GET_INFO (coding, attrs, charset_list);
5681   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5682
5683   while (charbuf < charbuf_end)
5684     {
5685       struct charset *charset;
5686       unsigned code;
5687
5688       ASSURE_DESTINATION (safe_room);
5689       c = *charbuf++;
5690       if (ascii_compatible && ASCII_CHAR_P (c))
5691         EMIT_ONE_ASCII_BYTE (c);
5692       else if (CHAR_BYTE8_P (c))
5693         {
5694           c = CHAR_TO_BYTE8 (c);
5695           EMIT_ONE_BYTE (c);
5696         }
5697       else
5698         {
5699           charset = char_charset (c, charset_list, &code);
5700           if (charset)
5701             {
5702               if (CHARSET_DIMENSION (charset) == 1)
5703                 EMIT_ONE_BYTE (code);
5704               else if (CHARSET_DIMENSION (charset) == 2)
5705                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5706               else if (CHARSET_DIMENSION (charset) == 3)
5707                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5708               else
5709                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5710                                  (code >> 8) & 0xFF, code & 0xFF);
5711             }
5712           else
5713             {
5714               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5715                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5716               else
5717                 c = coding->default_char;
5718               EMIT_ONE_BYTE (c);
5719             }
5720         }
5721     }
5722
5723   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5724   coding->produced_char += produced_chars;
5725   coding->produced = dst - coding->destination;
5726   return 0;
5727 }
5728
5729 \f
5730 /*** 7. C library functions ***/
5731
5732 /* Setup coding context CODING from information about CODING_SYSTEM.
5733    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5734    CODING_SYSTEM is invalid, signal an error.  */
5735
5736 void
5737 setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
5738 {
5739   Lisp_Object attrs;
5740   Lisp_Object eol_type;
5741   Lisp_Object coding_type;
5742   Lisp_Object val;
5743
5744   if (NILP (coding_system))
5745     coding_system = Qundecided;
5746
5747   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5748
5749   attrs = CODING_ID_ATTRS (coding->id);
5750   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
5751
5752   coding->mode = 0;
5753   coding->head_ascii = -1;
5754   if (VECTORP (eol_type))
5755     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5756                             | CODING_REQUIRE_DETECTION_MASK);
5757   else if (! EQ (eol_type, Qunix))
5758     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5759                             | CODING_REQUIRE_ENCODING_MASK);
5760   else
5761     coding->common_flags = 0;
5762   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5763     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5764   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5765     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5766   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5767     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5768
5769   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5770   coding->max_charset_id = SCHARS (val) - 1;
5771   coding->safe_charsets = SDATA (val);
5772   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5773   coding->carryover_bytes = 0;
5774
5775   coding_type = CODING_ATTR_TYPE (attrs);
5776   if (EQ (coding_type, Qundecided))
5777     {
5778       coding->detector = NULL;
5779       coding->decoder = decode_coding_raw_text;
5780       coding->encoder = encode_coding_raw_text;
5781       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5782     }
5783   else if (EQ (coding_type, Qiso_2022))
5784     {
5785       int i;
5786       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5787
5788       /* Invoke graphic register 0 to plane 0.  */
5789       CODING_ISO_INVOCATION (coding, 0) = 0;
5790       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5791       CODING_ISO_INVOCATION (coding, 1)
5792         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5793       /* Setup the initial status of designation.  */
5794       for (i = 0; i < 4; i++)
5795         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5796       /* Not single shifting initially.  */
5797       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5798       /* Beginning of buffer should also be regarded as bol. */
5799       CODING_ISO_BOL (coding) = 1;
5800       coding->detector = detect_coding_iso_2022;
5801       coding->decoder = decode_coding_iso_2022;
5802       coding->encoder = encode_coding_iso_2022;
5803       if (flags & CODING_ISO_FLAG_SAFE)
5804         coding->mode |= CODING_MODE_SAFE_ENCODING;
5805       coding->common_flags
5806         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5807             | CODING_REQUIRE_FLUSHING_MASK);
5808       if (flags & CODING_ISO_FLAG_COMPOSITION)
5809         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5810       if (flags & CODING_ISO_FLAG_DESIGNATION)
5811         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5812       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5813         {
5814           setup_iso_safe_charsets (attrs);
5815           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5816           coding->max_charset_id = SCHARS (val) - 1;
5817           coding->safe_charsets = SDATA (val);
5818         }
5819       CODING_ISO_FLAGS (coding) = flags;
5820       CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5821       CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5822       CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5823       CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5824     }
5825   else if (EQ (coding_type, Qcharset))
5826     {
5827       coding->detector = detect_coding_charset;
5828       coding->decoder = decode_coding_charset;
5829       coding->encoder = encode_coding_charset;
5830       coding->common_flags
5831         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5832     }
5833   else if (EQ (coding_type, Qutf_8))
5834     {
5835       val = AREF (attrs, coding_attr_utf_bom);
5836       CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
5837                                    : EQ (val, Qt) ? utf_with_bom
5838                                    : utf_without_bom);
5839       coding->detector = detect_coding_utf_8;
5840       coding->decoder = decode_coding_utf_8;
5841       coding->encoder = encode_coding_utf_8;
5842       coding->common_flags
5843         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5844       if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
5845         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5846     }
5847   else if (EQ (coding_type, Qutf_16))
5848     {
5849       val = AREF (attrs, coding_attr_utf_bom);
5850       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
5851                                     : EQ (val, Qt) ? utf_with_bom
5852                                     : utf_without_bom);
5853       val = AREF (attrs, coding_attr_utf_16_endian);
5854       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5855                                        : utf_16_little_endian);
5856       CODING_UTF_16_SURROGATE (coding) = 0;
5857       coding->detector = detect_coding_utf_16;
5858       coding->decoder = decode_coding_utf_16;
5859       coding->encoder = encode_coding_utf_16;
5860       coding->common_flags
5861         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5862       if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
5863         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5864     }
5865   else if (EQ (coding_type, Qccl))
5866     {
5867       coding->detector = detect_coding_ccl;
5868       coding->decoder = decode_coding_ccl;
5869       coding->encoder = encode_coding_ccl;
5870       coding->common_flags
5871         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5872             | CODING_REQUIRE_FLUSHING_MASK);
5873     }
5874   else if (EQ (coding_type, Qemacs_mule))
5875     {
5876       coding->detector = detect_coding_emacs_mule;
5877       coding->decoder = decode_coding_emacs_mule;
5878       coding->encoder = encode_coding_emacs_mule;
5879       coding->common_flags
5880         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5881       coding->spec.emacs_mule.full_support = 1;
5882       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5883           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5884         {
5885           Lisp_Object tail, safe_charsets;
5886           int max_charset_id = 0;
5887
5888           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5889                tail = XCDR (tail))
5890             if (max_charset_id < XFASTINT (XCAR (tail)))
5891               max_charset_id = XFASTINT (XCAR (tail));
5892           safe_charsets = make_uninit_string (max_charset_id + 1);
5893           memset (SDATA (safe_charsets), 255, max_charset_id + 1);
5894           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5895                tail = XCDR (tail))
5896             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5897           coding->max_charset_id = max_charset_id;
5898           coding->safe_charsets = SDATA (safe_charsets);
5899           coding->spec.emacs_mule.full_support = 1;
5900         }
5901       coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5902       coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5903     }
5904   else if (EQ (coding_type, Qshift_jis))
5905     {
5906       coding->detector = detect_coding_sjis;
5907       coding->decoder = decode_coding_sjis;
5908       coding->encoder = encode_coding_sjis;
5909       coding->common_flags
5910         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5911     }
5912   else if (EQ (coding_type, Qbig5))
5913     {
5914       coding->detector = detect_coding_big5;
5915       coding->decoder = decode_coding_big5;
5916       coding->encoder = encode_coding_big5;
5917       coding->common_flags
5918         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5919     }
5920   else                          /* EQ (coding_type, Qraw_text) */
5921     {
5922       coding->detector = NULL;
5923       coding->decoder = decode_coding_raw_text;
5924       coding->encoder = encode_coding_raw_text;
5925       if (! EQ (eol_type, Qunix))
5926         {
5927           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5928           if (! VECTORP (eol_type))
5929             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5930         }
5931
5932     }
5933
5934   return;
5935 }
5936
5937 /* Return a list of charsets supported by CODING.  */
5938
5939 Lisp_Object
5940 coding_charset_list (struct coding_system *coding)
5941 {
5942   Lisp_Object attrs, charset_list;
5943
5944   CODING_GET_INFO (coding, attrs, charset_list);
5945   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5946     {
5947       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5948
5949       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5950         charset_list = Viso_2022_charset_list;
5951     }
5952   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5953     {
5954       charset_list = Vemacs_mule_charset_list;
5955     }
5956   return charset_list;
5957 }
5958
5959
5960 /* Return a list of charsets supported by CODING-SYSTEM.  */
5961
5962 Lisp_Object
5963 coding_system_charset_list (Lisp_Object coding_system)
5964 {
5965   int id;
5966   Lisp_Object attrs, charset_list;
5967
5968   CHECK_CODING_SYSTEM_GET_ID (coding_system, id);
5969   attrs = CODING_ID_ATTRS (id);
5970
5971   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5972     {
5973       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5974
5975       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5976         charset_list = Viso_2022_charset_list;
5977       else
5978         charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5979     }
5980   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5981     {
5982       charset_list = Vemacs_mule_charset_list;
5983     }
5984   else
5985     {
5986       charset_list = CODING_ATTR_CHARSET_LIST (attrs);
5987     }
5988   return charset_list;
5989 }
5990
5991
5992 /* Return raw-text or one of its subsidiaries that has the same
5993    eol_type as CODING-SYSTEM.  */
5994
5995 Lisp_Object
5996 raw_text_coding_system (Lisp_Object coding_system)
5997 {
5998   Lisp_Object spec, attrs;
5999   Lisp_Object eol_type, raw_text_eol_type;
6000
6001   if (NILP (coding_system))
6002     return Qraw_text;
6003   spec = CODING_SYSTEM_SPEC (coding_system);
6004   attrs = AREF (spec, 0);
6005
6006   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
6007     return coding_system;
6008
6009   eol_type = AREF (spec, 2);
6010   if (VECTORP (eol_type))
6011     return Qraw_text;
6012   spec = CODING_SYSTEM_SPEC (Qraw_text);
6013   raw_text_eol_type = AREF (spec, 2);
6014   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
6015           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
6016           : AREF (raw_text_eol_type, 2));
6017 }
6018
6019
6020 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
6021    does, return one of the subsidiary that has the same eol-spec as
6022    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
6023    inherit end-of-line format from the system's setting
6024    (system_eol_type).  */
6025
6026 Lisp_Object
6027 coding_inherit_eol_type (Lisp_Object coding_system, Lisp_Object parent)
6028 {
6029   Lisp_Object spec, eol_type;
6030
6031   if (NILP (coding_system))
6032     coding_system = Qraw_text;
6033   spec = CODING_SYSTEM_SPEC (coding_system);
6034   eol_type = AREF (spec, 2);
6035   if (VECTORP (eol_type))
6036     {
6037       Lisp_Object parent_eol_type;
6038
6039       if (! NILP (parent))
6040         {
6041           Lisp_Object parent_spec;
6042
6043           parent_spec = CODING_SYSTEM_SPEC (parent);
6044           parent_eol_type = AREF (parent_spec, 2);
6045         }
6046       else
6047         parent_eol_type = system_eol_type;
6048       if (EQ (parent_eol_type, Qunix))
6049         coding_system = AREF (eol_type, 0);
6050       else if (EQ (parent_eol_type, Qdos))
6051         coding_system = AREF (eol_type, 1);
6052       else if (EQ (parent_eol_type, Qmac))
6053         coding_system = AREF (eol_type, 2);
6054     }
6055   return coding_system;
6056 }
6057
6058 /* Emacs has a mechanism to automatically detect a coding system if it
6059    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
6060    it's impossible to distinguish some coding systems accurately
6061    because they use the same range of codes.  So, at first, coding
6062    systems are categorized into 7, those are:
6063
6064    o coding-category-emacs-mule
6065
6066         The category for a coding system which has the same code range
6067         as Emacs' internal format.  Assigned the coding-system (Lisp
6068         symbol) `emacs-mule' by default.
6069
6070    o coding-category-sjis
6071
6072         The category for a coding system which has the same code range
6073         as SJIS.  Assigned the coding-system (Lisp
6074         symbol) `japanese-shift-jis' by default.
6075
6076    o coding-category-iso-7
6077
6078         The category for a coding system which has the same code range
6079         as ISO2022 of 7-bit environment.  This doesn't use any locking
6080         shift and single shift functions.  This can encode/decode all
6081         charsets.  Assigned the coding-system (Lisp symbol)
6082         `iso-2022-7bit' by default.
6083
6084    o coding-category-iso-7-tight
6085
6086         Same as coding-category-iso-7 except that this can
6087         encode/decode only the specified charsets.
6088
6089    o coding-category-iso-8-1
6090
6091         The category for a coding system which has the same code range
6092         as ISO2022 of 8-bit environment and graphic plane 1 used only
6093         for DIMENSION1 charset.  This doesn't use any locking shift
6094         and single shift functions.  Assigned the coding-system (Lisp
6095         symbol) `iso-latin-1' by default.
6096
6097    o coding-category-iso-8-2
6098
6099         The category for a coding system which has the same code range
6100         as ISO2022 of 8-bit environment and graphic plane 1 used only
6101         for DIMENSION2 charset.  This doesn't use any locking shift
6102         and single shift functions.  Assigned the coding-system (Lisp
6103         symbol) `japanese-iso-8bit' by default.
6104
6105    o coding-category-iso-7-else
6106
6107         The category for a coding system which has the same code range
6108         as ISO2022 of 7-bit environemnt but uses locking shift or
6109         single shift functions.  Assigned the coding-system (Lisp
6110         symbol) `iso-2022-7bit-lock' by default.
6111
6112    o coding-category-iso-8-else
6113
6114         The category for a coding system which has the same code range
6115         as ISO2022 of 8-bit environemnt but uses locking shift or
6116         single shift functions.  Assigned the coding-system (Lisp
6117         symbol) `iso-2022-8bit-ss2' by default.
6118
6119    o coding-category-big5
6120
6121         The category for a coding system which has the same code range
6122         as BIG5.  Assigned the coding-system (Lisp symbol)
6123         `cn-big5' by default.
6124
6125    o coding-category-utf-8
6126
6127         The category for a coding system which has the same code range
6128         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
6129         symbol) `utf-8' by default.
6130
6131    o coding-category-utf-16-be
6132
6133         The category for a coding system in which a text has an
6134         Unicode signature (cf. Unicode Standard) in the order of BIG
6135         endian at the head.  Assigned the coding-system (Lisp symbol)
6136         `utf-16-be' by default.
6137
6138    o coding-category-utf-16-le
6139
6140         The category for a coding system in which a text has an
6141         Unicode signature (cf. Unicode Standard) in the order of
6142         LITTLE endian at the head.  Assigned the coding-system (Lisp
6143         symbol) `utf-16-le' by default.
6144
6145    o coding-category-ccl
6146
6147         The category for a coding system of which encoder/decoder is
6148         written in CCL programs.  The default value is nil, i.e., no
6149         coding system is assigned.
6150
6151    o coding-category-binary
6152
6153         The category for a coding system not categorized in any of the
6154         above.  Assigned the coding-system (Lisp symbol)
6155         `no-conversion' by default.
6156
6157    Each of them is a Lisp symbol and the value is an actual
6158    `coding-system's (this is also a Lisp symbol) assigned by a user.
6159    What Emacs does actually is to detect a category of coding system.
6160    Then, it uses a `coding-system' assigned to it.  If Emacs can't
6161    decide only one possible category, it selects a category of the
6162    highest priority.  Priorities of categories are also specified by a
6163    user in a Lisp variable `coding-category-list'.
6164
6165 */
6166
6167 #define EOL_SEEN_NONE   0
6168 #define EOL_SEEN_LF     1
6169 #define EOL_SEEN_CR     2
6170 #define EOL_SEEN_CRLF   4
6171
6172 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
6173    SOURCE is encoded.  If CATEGORY is one of
6174    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
6175    two-byte, else they are encoded by one-byte.
6176
6177    Return one of EOL_SEEN_XXX.  */
6178
6179 #define MAX_EOL_CHECK_COUNT 3
6180
6181 static int
6182 detect_eol (const unsigned char *source, EMACS_INT src_bytes,
6183             enum coding_category category)
6184 {
6185   const unsigned char *src = source, *src_end = src + src_bytes;
6186   unsigned char c;
6187   int total  = 0;
6188   int eol_seen = EOL_SEEN_NONE;
6189
6190   if ((1 << category) & CATEGORY_MASK_UTF_16)
6191     {
6192       int msb, lsb;
6193
6194       msb = category == (coding_category_utf_16_le
6195                          | coding_category_utf_16_le_nosig);
6196       lsb = 1 - msb;
6197
6198       while (src + 1 < src_end)
6199         {
6200           c = src[lsb];
6201           if (src[msb] == 0 && (c == '\n' || c == '\r'))
6202             {
6203               int this_eol;
6204
6205               if (c == '\n')
6206                 this_eol = EOL_SEEN_LF;
6207               else if (src + 3 >= src_end
6208                        || src[msb + 2] != 0
6209                        || src[lsb + 2] != '\n')
6210                 this_eol = EOL_SEEN_CR;
6211               else
6212                 {
6213                   this_eol = EOL_SEEN_CRLF;
6214                   src += 2;
6215                 }
6216
6217               if (eol_seen == EOL_SEEN_NONE)
6218                 /* This is the first end-of-line.  */
6219                 eol_seen = this_eol;
6220               else if (eol_seen != this_eol)
6221                 {
6222                   /* The found type is different from what found before.
6223                      Allow for stray ^M characters in DOS EOL files.  */
6224                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6225                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6226                     eol_seen = EOL_SEEN_CRLF;
6227                   else
6228                     {
6229                       eol_seen = EOL_SEEN_LF;
6230                       break;
6231                     }
6232                 }
6233               if (++total == MAX_EOL_CHECK_COUNT)
6234                 break;
6235             }
6236           src += 2;
6237         }
6238     }
6239   else
6240     {
6241       while (src < src_end)
6242         {
6243           c = *src++;
6244           if (c == '\n' || c == '\r')
6245             {
6246               int this_eol;
6247
6248               if (c == '\n')
6249                 this_eol = EOL_SEEN_LF;
6250               else if (src >= src_end || *src != '\n')
6251                 this_eol = EOL_SEEN_CR;
6252               else
6253                 this_eol = EOL_SEEN_CRLF, src++;
6254
6255               if (eol_seen == EOL_SEEN_NONE)
6256                 /* This is the first end-of-line.  */
6257                 eol_seen = this_eol;
6258               else if (eol_seen != this_eol)
6259                 {
6260                   /* The found type is different from what found before.
6261                      Allow for stray ^M characters in DOS EOL files.  */
6262                   if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
6263                       || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
6264                     eol_seen = EOL_SEEN_CRLF;
6265                   else
6266                     {
6267                       eol_seen = EOL_SEEN_LF;
6268                       break;
6269                     }
6270                 }
6271               if (++total == MAX_EOL_CHECK_COUNT)
6272                 break;
6273             }
6274         }
6275     }
6276   return eol_seen;
6277 }
6278
6279
6280 static Lisp_Object
6281 adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
6282 {
6283   Lisp_Object eol_type;
6284
6285   eol_type = CODING_ID_EOL_TYPE (coding->id);
6286   if (eol_seen & EOL_SEEN_LF)
6287     {
6288       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
6289       eol_type = Qunix;
6290     }
6291   else if (eol_seen & EOL_SEEN_CRLF)
6292     {
6293       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
6294       eol_type = Qdos;
6295     }
6296   else if (eol_seen & EOL_SEEN_CR)
6297     {
6298       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
6299       eol_type = Qmac;
6300     }
6301   return eol_type;
6302 }
6303
6304 /* Detect how a text specified in CODING is encoded.  If a coding
6305    system is detected, update fields of CODING by the detected coding
6306    system.  */
6307
6308 void
6309 detect_coding (struct coding_system *coding)
6310 {
6311   const unsigned char *src, *src_end;
6312   int saved_mode = coding->mode;
6313
6314   coding->consumed = coding->consumed_char = 0;
6315   coding->produced = coding->produced_char = 0;
6316   coding_set_source (coding);
6317
6318   src_end = coding->source + coding->src_bytes;
6319   coding->head_ascii = 0;
6320
6321   /* If we have not yet decided the text encoding type, detect it
6322      now.  */
6323   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
6324     {
6325       int c, i;
6326       struct coding_detection_info detect_info;
6327       int null_byte_found = 0, eight_bit_found = 0;
6328
6329       detect_info.checked = detect_info.found = detect_info.rejected = 0;
6330       for (src = coding->source; src < src_end; src++)
6331         {
6332           c = *src;
6333           if (c & 0x80)
6334             {
6335               eight_bit_found = 1;
6336               if (null_byte_found)
6337                 break;
6338             }
6339           else if (c < 0x20)
6340             {
6341               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
6342                   && ! inhibit_iso_escape_detection
6343                   && ! detect_info.checked)
6344                 {
6345                   if (detect_coding_iso_2022 (coding, &detect_info))
6346                     {
6347                       /* We have scanned the whole data.  */
6348                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
6349                         {
6350                           /* We didn't find an 8-bit code.  We may
6351                              have found a null-byte, but it's very
6352                              rare that a binary file conforms to
6353                              ISO-2022.  */
6354                           src = src_end;
6355                           coding->head_ascii = src - coding->source;
6356                         }
6357                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
6358                       break;
6359                     }
6360                 }
6361               else if (! c && !inhibit_null_byte_detection)
6362                 {
6363                   null_byte_found = 1;
6364                   if (eight_bit_found)
6365                     break;
6366                 }
6367               if (! eight_bit_found)
6368                 coding->head_ascii++;
6369             }
6370           else if (! eight_bit_found)
6371             coding->head_ascii++;
6372         }
6373
6374       if (null_byte_found || eight_bit_found
6375           || coding->head_ascii < coding->src_bytes
6376           || detect_info.found)
6377         {
6378           enum coding_category category;
6379           struct coding_system *this;
6380
6381           if (coding->head_ascii == coding->src_bytes)
6382             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
6383             for (i = 0; i < coding_category_raw_text; i++)
6384               {
6385                 category = coding_priorities[i];
6386                 this = coding_categories + category;
6387                 if (detect_info.found & (1 << category))
6388                   break;
6389               }
6390           else
6391             {
6392               if (null_byte_found)
6393                 {
6394                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
6395                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
6396                 }
6397               for (i = 0; i < coding_category_raw_text; i++)
6398                 {
6399                   category = coding_priorities[i];
6400                   this = coding_categories + category;
6401                   if (this->id < 0)
6402                     {
6403                       /* No coding system of this category is defined.  */
6404                       detect_info.rejected |= (1 << category);
6405                     }
6406                   else if (category >= coding_category_raw_text)
6407                     continue;
6408                   else if (detect_info.checked & (1 << category))
6409                     {
6410                       if (detect_info.found & (1 << category))
6411                         break;
6412                     }
6413                   else if ((*(this->detector)) (coding, &detect_info)
6414                            && detect_info.found & (1 << category))
6415                     {
6416                       if (category == coding_category_utf_16_auto)
6417                         {
6418                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6419                             category = coding_category_utf_16_le;
6420                           else
6421                             category = coding_category_utf_16_be;
6422                         }
6423                       break;
6424                     }
6425                 }
6426             }
6427
6428           if (i < coding_category_raw_text)
6429             setup_coding_system (CODING_ID_NAME (this->id), coding);
6430           else if (null_byte_found)
6431             setup_coding_system (Qno_conversion, coding);
6432           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
6433                    == CATEGORY_MASK_ANY)
6434             setup_coding_system (Qraw_text, coding);
6435           else if (detect_info.rejected)
6436             for (i = 0; i < coding_category_raw_text; i++)
6437               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6438                 {
6439                   this = coding_categories + coding_priorities[i];
6440                   setup_coding_system (CODING_ID_NAME (this->id), coding);
6441                   break;
6442                 }
6443         }
6444     }
6445   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6446            == coding_category_utf_8_auto)
6447     {
6448       Lisp_Object coding_systems;
6449       struct coding_detection_info detect_info;
6450
6451       coding_systems
6452         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6453       detect_info.found = detect_info.rejected = 0;
6454       coding->head_ascii = 0;
6455       if (CONSP (coding_systems)
6456           && detect_coding_utf_8 (coding, &detect_info))
6457         {
6458           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
6459             setup_coding_system (XCAR (coding_systems), coding);
6460           else
6461             setup_coding_system (XCDR (coding_systems), coding);
6462         }
6463     }
6464   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
6465            == coding_category_utf_16_auto)
6466     {
6467       Lisp_Object coding_systems;
6468       struct coding_detection_info detect_info;
6469
6470       coding_systems
6471         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
6472       detect_info.found = detect_info.rejected = 0;
6473       coding->head_ascii = 0;
6474       if (CONSP (coding_systems)
6475           && detect_coding_utf_16 (coding, &detect_info))
6476         {
6477           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6478             setup_coding_system (XCAR (coding_systems), coding);
6479           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6480             setup_coding_system (XCDR (coding_systems), coding);
6481         }
6482     }
6483   coding->mode = saved_mode;
6484 }
6485
6486
6487 static void
6488 decode_eol (struct coding_system *coding)
6489 {
6490   Lisp_Object eol_type;
6491   unsigned char *p, *pbeg, *pend;
6492
6493   eol_type = CODING_ID_EOL_TYPE (coding->id);
6494   if (EQ (eol_type, Qunix) || inhibit_eol_conversion)
6495     return;
6496
6497   if (NILP (coding->dst_object))
6498     pbeg = coding->destination;
6499   else
6500     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
6501   pend = pbeg + coding->produced;
6502
6503   if (VECTORP (eol_type))
6504     {
6505       int eol_seen = EOL_SEEN_NONE;
6506
6507       for (p = pbeg; p < pend; p++)
6508         {
6509           if (*p == '\n')
6510             eol_seen |= EOL_SEEN_LF;
6511           else if (*p == '\r')
6512             {
6513               if (p + 1 < pend && *(p + 1) == '\n')
6514                 {
6515                   eol_seen |= EOL_SEEN_CRLF;
6516                   p++;
6517                 }
6518               else
6519                 eol_seen |= EOL_SEEN_CR;
6520             }
6521         }
6522       /* Handle DOS-style EOLs in a file with stray ^M characters.  */
6523       if ((eol_seen & EOL_SEEN_CRLF) != 0
6524           && (eol_seen & EOL_SEEN_CR) != 0
6525           && (eol_seen & EOL_SEEN_LF) == 0)
6526         eol_seen = EOL_SEEN_CRLF;
6527       else if (eol_seen != EOL_SEEN_NONE
6528           && eol_seen != EOL_SEEN_LF
6529           && eol_seen != EOL_SEEN_CRLF
6530           && eol_seen != EOL_SEEN_CR)
6531         eol_seen = EOL_SEEN_LF;
6532       if (eol_seen != EOL_SEEN_NONE)
6533         eol_type = adjust_coding_eol_type (coding, eol_seen);
6534     }
6535
6536   if (EQ (eol_type, Qmac))
6537     {
6538       for (p = pbeg; p < pend; p++)
6539         if (*p == '\r')
6540           *p = '\n';
6541     }
6542   else if (EQ (eol_type, Qdos))
6543     {
6544       int n = 0;
6545
6546       if (NILP (coding->dst_object))
6547         {
6548           /* Start deleting '\r' from the tail to minimize the memory
6549              movement.  */
6550           for (p = pend - 2; p >= pbeg; p--)
6551             if (*p == '\r')
6552               {
6553                 memmove (p, p + 1, pend-- - p - 1);
6554                 n++;
6555               }
6556         }
6557       else
6558         {
6559           int pos_byte = coding->dst_pos_byte;
6560           int pos = coding->dst_pos;
6561           int pos_end = pos + coding->produced_char - 1;
6562
6563           while (pos < pos_end)
6564             {
6565               p = BYTE_POS_ADDR (pos_byte);
6566               if (*p == '\r' && p[1] == '\n')
6567                 {
6568                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
6569                   n++;
6570                   pos_end--;
6571                 }
6572               pos++;
6573               if (coding->dst_multibyte)
6574                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
6575               else
6576                 pos_byte++;
6577             }
6578         }
6579       coding->produced -= n;
6580       coding->produced_char -= n;
6581     }
6582 }
6583
6584
6585 /* Return a translation table (or list of them) from coding system
6586    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
6587    decoding (ENCODEP is zero). */
6588
6589 static Lisp_Object
6590 get_translation_table (Lisp_Object attrs, int encodep, int *max_lookup)
6591 {
6592   Lisp_Object standard, translation_table;
6593   Lisp_Object val;
6594
6595   if (NILP (Venable_character_translation))
6596     {
6597       if (max_lookup)
6598         *max_lookup = 0;
6599       return Qnil;
6600     }
6601   if (encodep)
6602     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
6603       standard = Vstandard_translation_table_for_encode;
6604   else
6605     translation_table = CODING_ATTR_DECODE_TBL (attrs),
6606       standard = Vstandard_translation_table_for_decode;
6607   if (NILP (translation_table))
6608     translation_table = standard;
6609   else
6610     {
6611       if (SYMBOLP (translation_table))
6612         translation_table = Fget (translation_table, Qtranslation_table);
6613       else if (CONSP (translation_table))
6614         {
6615           translation_table = Fcopy_sequence (translation_table);
6616           for (val = translation_table; CONSP (val); val = XCDR (val))
6617             if (SYMBOLP (XCAR (val)))
6618               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
6619         }
6620       if (CHAR_TABLE_P (standard))
6621         {
6622           if (CONSP (translation_table))
6623             translation_table = nconc2 (translation_table,
6624                                         Fcons (standard, Qnil));
6625           else
6626             translation_table = Fcons (translation_table,
6627                                        Fcons (standard, Qnil));
6628         }
6629     }
6630
6631   if (max_lookup)
6632     {
6633       *max_lookup = 1;
6634       if (CHAR_TABLE_P (translation_table)
6635           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
6636         {
6637           val = XCHAR_TABLE (translation_table)->extras[1];
6638           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6639             *max_lookup = XFASTINT (val);
6640         }
6641       else if (CONSP (translation_table))
6642         {
6643           Lisp_Object tail, val;
6644
6645           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
6646             if (CHAR_TABLE_P (XCAR (tail))
6647                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
6648               {
6649                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
6650                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
6651                   *max_lookup = XFASTINT (val);
6652               }
6653         }
6654     }
6655   return translation_table;
6656 }
6657
6658 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
6659   do {                                                          \
6660     trans = Qnil;                                               \
6661     if (CHAR_TABLE_P (table))                                   \
6662       {                                                         \
6663         trans = CHAR_TABLE_REF (table, c);                      \
6664         if (CHARACTERP (trans))                                 \
6665           c = XFASTINT (trans), trans = Qnil;                   \
6666       }                                                         \
6667     else if (CONSP (table))                                     \
6668       {                                                         \
6669         Lisp_Object tail;                                       \
6670                                                                 \
6671         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
6672           if (CHAR_TABLE_P (XCAR (tail)))                       \
6673             {                                                   \
6674               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
6675               if (CHARACTERP (trans))                           \
6676                 c = XFASTINT (trans), trans = Qnil;             \
6677               else if (! NILP (trans))                          \
6678                 break;                                          \
6679             }                                                   \
6680       }                                                         \
6681   } while (0)
6682
6683
6684 /* Return a translation of character(s) at BUF according to TRANS.
6685    TRANS is TO-CHAR or ((FROM .  TO) ...) where
6686    FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6687    The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6688    translation is found, and Qnil if not found..
6689    If BUF is too short to lookup characters in FROM, return Qt.  */
6690
6691 static Lisp_Object
6692 get_translation (Lisp_Object trans, int *buf, int *buf_end)
6693 {
6694
6695   if (INTEGERP (trans))
6696     return trans;
6697   for (; CONSP (trans); trans = XCDR (trans))
6698     {
6699       Lisp_Object val = XCAR (trans);
6700       Lisp_Object from = XCAR (val);
6701       int len = ASIZE (from);
6702       int i;
6703
6704       for (i = 0; i < len; i++)
6705         {
6706           if (buf + i == buf_end)
6707             return Qt;
6708           if (XINT (AREF (from, i)) != buf[i])
6709             break;
6710         }
6711       if (i == len)
6712         return val;
6713     }
6714   return Qnil;
6715 }
6716
6717
6718 static int
6719 produce_chars (struct coding_system *coding, Lisp_Object translation_table,
6720                int last_block)
6721 {
6722   unsigned char *dst = coding->destination + coding->produced;
6723   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6724   EMACS_INT produced;
6725   EMACS_INT produced_chars = 0;
6726   int carryover = 0;
6727
6728   if (! coding->chars_at_source)
6729     {
6730       /* Source characters are in coding->charbuf.  */
6731       int *buf = coding->charbuf;
6732       int *buf_end = buf + coding->charbuf_used;
6733
6734       if (EQ (coding->src_object, coding->dst_object))
6735         {
6736           coding_set_source (coding);
6737           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6738         }
6739
6740       while (buf < buf_end)
6741         {
6742           int c = *buf, i;
6743
6744           if (c >= 0)
6745             {
6746               int from_nchars = 1, to_nchars = 1;
6747               Lisp_Object trans = Qnil;
6748
6749               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6750               if (! NILP (trans))
6751                 {
6752                   trans = get_translation (trans, buf, buf_end);
6753                   if (INTEGERP (trans))
6754                     c = XINT (trans);
6755                   else if (CONSP (trans))
6756                     {
6757                       from_nchars = ASIZE (XCAR (trans));
6758                       trans = XCDR (trans);
6759                       if (INTEGERP (trans))
6760                         c = XINT (trans);
6761                       else
6762                         {
6763                           to_nchars = ASIZE (trans);
6764                           c = XINT (AREF (trans, 0));
6765                         }
6766                     }
6767                   else if (EQ (trans, Qt) && ! last_block)
6768                     break;
6769                 }
6770
6771               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6772                 {
6773                   dst = alloc_destination (coding,
6774                                            buf_end - buf
6775                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6776                                            dst);
6777                   if (EQ (coding->src_object, coding->dst_object))
6778                     {
6779                       coding_set_source (coding);
6780                       dst_end = (((unsigned char *) coding->source)
6781                                  + coding->consumed);
6782                     }
6783                   else
6784                     dst_end = coding->destination + coding->dst_bytes;
6785                 }
6786
6787               for (i = 0; i < to_nchars; i++)
6788                 {
6789                   if (i > 0)
6790                     c = XINT (AREF (trans, i));
6791                   if (coding->dst_multibyte
6792                       || ! CHAR_BYTE8_P (c))
6793                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6794                   else
6795                     *dst++ = CHAR_TO_BYTE8 (c);
6796                 }
6797               produced_chars += to_nchars;
6798               buf += from_nchars;
6799             }
6800           else
6801             /* This is an annotation datum.  (-C) is the length.  */
6802             buf += -c;
6803         }
6804       carryover = buf_end - buf;
6805     }
6806   else
6807     {
6808       /* Source characters are at coding->source.  */
6809       const unsigned char *src = coding->source;
6810       const unsigned char *src_end = src + coding->consumed;
6811
6812       if (EQ (coding->dst_object, coding->src_object))
6813         dst_end = (unsigned char *) src;
6814       if (coding->src_multibyte != coding->dst_multibyte)
6815         {
6816           if (coding->src_multibyte)
6817             {
6818               int multibytep = 1;
6819               EMACS_INT consumed_chars = 0;
6820
6821               while (1)
6822                 {
6823                   const unsigned char *src_base = src;
6824                   int c;
6825
6826                   ONE_MORE_BYTE (c);
6827                   if (dst == dst_end)
6828                     {
6829                       if (EQ (coding->src_object, coding->dst_object))
6830                         dst_end = (unsigned char *) src;
6831                       if (dst == dst_end)
6832                         {
6833                           EMACS_INT offset = src - coding->source;
6834
6835                           dst = alloc_destination (coding, src_end - src + 1,
6836                                                    dst);
6837                           dst_end = coding->destination + coding->dst_bytes;
6838                           coding_set_source (coding);
6839                           src = coding->source + offset;
6840                           src_end = coding->source + coding->src_bytes;
6841                           if (EQ (coding->src_object, coding->dst_object))
6842                             dst_end = (unsigned char *) src;
6843                         }
6844                     }
6845                   *dst++ = c;
6846                   produced_chars++;
6847                 }
6848             no_more_source:
6849               ;
6850             }
6851           else
6852             while (src < src_end)
6853               {
6854                 int multibytep = 1;
6855                 int c = *src++;
6856
6857                 if (dst >= dst_end - 1)
6858                   {
6859                     if (EQ (coding->src_object, coding->dst_object))
6860                       dst_end = (unsigned char *) src;
6861                     if (dst >= dst_end - 1)
6862                       {
6863                         EMACS_INT offset = src - coding->source;
6864                         EMACS_INT more_bytes;
6865
6866                         if (EQ (coding->src_object, coding->dst_object))
6867                           more_bytes = ((src_end - src) / 2) + 2;
6868                         else
6869                           more_bytes = src_end - src + 2;
6870                         dst = alloc_destination (coding, more_bytes, dst);
6871                         dst_end = coding->destination + coding->dst_bytes;
6872                         coding_set_source (coding);
6873                         src = coding->source + offset;
6874                         src_end = coding->source + coding->src_bytes;
6875                         if (EQ (coding->src_object, coding->dst_object))
6876                           dst_end = (unsigned char *) src;
6877                       }
6878                   }
6879                 EMIT_ONE_BYTE (c);
6880               }
6881         }
6882       else
6883         {
6884           if (!EQ (coding->src_object, coding->dst_object))
6885             {
6886               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6887
6888               if (require > 0)
6889                 {
6890                   EMACS_INT offset = src - coding->source;
6891
6892                   dst = alloc_destination (coding, require, dst);
6893                   coding_set_source (coding);
6894                   src = coding->source + offset;
6895                   src_end = coding->source + coding->src_bytes;
6896                 }
6897             }
6898           produced_chars = coding->consumed_char;
6899           while (src < src_end)
6900             *dst++ = *src++;
6901         }
6902     }
6903
6904   produced = dst - (coding->destination + coding->produced);
6905   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6906     insert_from_gap (produced_chars, produced);
6907   coding->produced += produced;
6908   coding->produced_char += produced_chars;
6909   return carryover;
6910 }
6911
6912 /* Compose text in CODING->object according to the annotation data at
6913    CHARBUF.  CHARBUF is an array:
6914      [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6915  */
6916
6917 static INLINE void
6918 produce_composition (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6919 {
6920   int len;
6921   EMACS_INT to;
6922   enum composition_method method;
6923   Lisp_Object components;
6924
6925   len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6926   to = pos + charbuf[2];
6927   method = (enum composition_method) (charbuf[4]);
6928
6929   if (method == COMPOSITION_RELATIVE)
6930     components = Qnil;
6931   else
6932     {
6933       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6934       int i, j;
6935
6936       if (method == COMPOSITION_WITH_RULE)
6937         len = charbuf[2] * 3 - 2;
6938       charbuf += MAX_ANNOTATION_LENGTH;
6939       /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6940       for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6941         {
6942           if (charbuf[i] >= 0)
6943             args[j] = make_number (charbuf[i]);
6944           else
6945             {
6946               i++;
6947               args[j] = make_number (charbuf[i] % 0x100);
6948             }
6949         }
6950       components = (i == j ? Fstring (j, args) : Fvector (j, args));
6951     }
6952   compose_text (pos, to, components, Qnil, coding->dst_object);
6953 }
6954
6955
6956 /* Put `charset' property on text in CODING->object according to
6957    the annotation data at CHARBUF.  CHARBUF is an array:
6958      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6959  */
6960
6961 static INLINE void
6962 produce_charset (struct coding_system *coding, int *charbuf, EMACS_INT pos)
6963 {
6964   EMACS_INT from = pos - charbuf[2];
6965   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6966
6967   Fput_text_property (make_number (from), make_number (pos),
6968                       Qcharset, CHARSET_NAME (charset),
6969                       coding->dst_object);
6970 }
6971
6972
6973 #define CHARBUF_SIZE 0x4000
6974
6975 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6976   do {                                                                  \
6977     int size = CHARBUF_SIZE;                                            \
6978                                                                         \
6979     coding->charbuf = NULL;                                             \
6980     while (size > 1024)                                                 \
6981       {                                                                 \
6982         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6983         if (coding->charbuf)                                            \
6984           break;                                                        \
6985         size >>= 1;                                                     \
6986       }                                                                 \
6987     if (! coding->charbuf)                                              \
6988       {                                                                 \
6989         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6990         return coding->result;                                          \
6991       }                                                                 \
6992     coding->charbuf_size = size;                                        \
6993   } while (0)
6994
6995
6996 static void
6997 produce_annotation (struct coding_system *coding, EMACS_INT pos)
6998 {
6999   int *charbuf = coding->charbuf;
7000   int *charbuf_end = charbuf + coding->charbuf_used;
7001
7002   if (NILP (coding->dst_object))
7003     return;
7004
7005   while (charbuf < charbuf_end)
7006     {
7007       if (*charbuf >= 0)
7008         pos++, charbuf++;
7009       else
7010         {
7011           int len = -*charbuf;
7012
7013           if (len > 2)
7014             switch (charbuf[1])
7015               {
7016               case CODING_ANNOTATE_COMPOSITION_MASK:
7017                 produce_composition (coding, charbuf, pos);
7018                 break;
7019               case CODING_ANNOTATE_CHARSET_MASK:
7020                 produce_charset (coding, charbuf, pos);
7021                 break;
7022               }
7023           charbuf += len;
7024         }
7025     }
7026 }
7027
7028 /* Decode the data at CODING->src_object into CODING->dst_object.
7029    CODING->src_object is a buffer, a string, or nil.
7030    CODING->dst_object is a buffer.
7031
7032    If CODING->src_object is a buffer, it must be the current buffer.
7033    In this case, if CODING->src_pos is positive, it is a position of
7034    the source text in the buffer, otherwise, the source text is in the
7035    gap area of the buffer, and CODING->src_pos specifies the offset of
7036    the text from GPT (which must be the same as PT).  If this is the
7037    same buffer as CODING->dst_object, CODING->src_pos must be
7038    negative.
7039
7040    If CODING->src_object is a string, CODING->src_pos is an index to
7041    that string.
7042
7043    If CODING->src_object is nil, CODING->source must already point to
7044    the non-relocatable memory area.  In this case, CODING->src_pos is
7045    an offset from CODING->source.
7046
7047    The decoded data is inserted at the current point of the buffer
7048    CODING->dst_object.
7049 */
7050
7051 static int
7052 decode_coding (struct coding_system *coding)
7053 {
7054   Lisp_Object attrs;
7055   Lisp_Object undo_list;
7056   Lisp_Object translation_table;
7057   struct ccl_spec cclspec;
7058   int carryover;
7059   int i;
7060
7061   if (BUFFERP (coding->src_object)
7062       && coding->src_pos > 0
7063       && coding->src_pos < GPT
7064       && coding->src_pos + coding->src_chars > GPT)
7065     move_gap_both (coding->src_pos, coding->src_pos_byte);
7066
7067   undo_list = Qt;
7068   if (BUFFERP (coding->dst_object))
7069     {
7070       if (current_buffer != XBUFFER (coding->dst_object))
7071         set_buffer_internal (XBUFFER (coding->dst_object));
7072       if (GPT != PT)
7073         move_gap_both (PT, PT_BYTE);
7074       undo_list = current_buffer->undo_list;
7075       current_buffer->undo_list = Qt;
7076     }
7077
7078   coding->consumed = coding->consumed_char = 0;
7079   coding->produced = coding->produced_char = 0;
7080   coding->chars_at_source = 0;
7081   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7082   coding->errors = 0;
7083
7084   ALLOC_CONVERSION_WORK_AREA (coding);
7085
7086   attrs = CODING_ID_ATTRS (coding->id);
7087   translation_table = get_translation_table (attrs, 0, NULL);
7088
7089   carryover = 0;
7090   if (coding->decoder == decode_coding_ccl)
7091     {
7092       coding->spec.ccl = &cclspec;
7093       setup_ccl_program (&cclspec.ccl, CODING_CCL_DECODER (coding));
7094     }
7095   do
7096     {
7097       EMACS_INT pos = coding->dst_pos + coding->produced_char;
7098
7099       coding_set_source (coding);
7100       coding->annotated = 0;
7101       coding->charbuf_used = carryover;
7102       (*(coding->decoder)) (coding);
7103       coding_set_destination (coding);
7104       carryover = produce_chars (coding, translation_table, 0);
7105       if (coding->annotated)
7106         produce_annotation (coding, pos);
7107       for (i = 0; i < carryover; i++)
7108         coding->charbuf[i]
7109           = coding->charbuf[coding->charbuf_used - carryover + i];
7110     }
7111   while (coding->result == CODING_RESULT_INSUFFICIENT_DST
7112          || (coding->consumed < coding->src_bytes
7113              && (coding->result == CODING_RESULT_SUCCESS
7114                  || coding->result == CODING_RESULT_INVALID_SRC)));
7115
7116   if (carryover > 0)
7117     {
7118       coding_set_destination (coding);
7119       coding->charbuf_used = carryover;
7120       produce_chars (coding, translation_table, 1);
7121     }
7122
7123   coding->carryover_bytes = 0;
7124   if (coding->consumed < coding->src_bytes)
7125     {
7126       int nbytes = coding->src_bytes - coding->consumed;
7127       const unsigned char *src;
7128
7129       coding_set_source (coding);
7130       coding_set_destination (coding);
7131       src = coding->source + coding->consumed;
7132
7133       if (coding->mode & CODING_MODE_LAST_BLOCK)
7134         {
7135           /* Flush out unprocessed data as binary chars.  We are sure
7136              that the number of data is less than the size of
7137              coding->charbuf.  */
7138           coding->charbuf_used = 0;
7139           coding->chars_at_source = 0;
7140
7141           while (nbytes-- > 0)
7142             {
7143               int c = *src++;
7144
7145               if (c & 0x80)
7146                 c = BYTE8_TO_CHAR (c);
7147               coding->charbuf[coding->charbuf_used++] = c;
7148             }
7149           produce_chars (coding, Qnil, 1);
7150         }
7151       else
7152         {
7153           /* Record unprocessed bytes in coding->carryover.  We are
7154              sure that the number of data is less than the size of
7155              coding->carryover.  */
7156           unsigned char *p = coding->carryover;
7157
7158           if (nbytes > sizeof coding->carryover)
7159             nbytes = sizeof coding->carryover;
7160           coding->carryover_bytes = nbytes;
7161           while (nbytes-- > 0)
7162             *p++ = *src++;
7163         }
7164       coding->consumed = coding->src_bytes;
7165     }
7166
7167   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)
7168       && !inhibit_eol_conversion)
7169     decode_eol (coding);
7170   if (BUFFERP (coding->dst_object))
7171     {
7172       current_buffer->undo_list = undo_list;
7173       record_insert (coding->dst_pos, coding->produced_char);
7174     }
7175   return coding->result;
7176 }
7177
7178
7179 /* Extract an annotation datum from a composition starting at POS and
7180    ending before LIMIT of CODING->src_object (buffer or string), store
7181    the data in BUF, set *STOP to a starting position of the next
7182    composition (if any) or to LIMIT, and return the address of the
7183    next element of BUF.
7184
7185    If such an annotation is not found, set *STOP to a starting
7186    position of a composition after POS (if any) or to LIMIT, and
7187    return BUF.  */
7188
7189 static INLINE int *
7190 handle_composition_annotation (EMACS_INT pos, EMACS_INT limit,
7191                                struct coding_system *coding, int *buf,
7192                                EMACS_INT *stop)
7193 {
7194   EMACS_INT start, end;
7195   Lisp_Object prop;
7196
7197   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
7198       || end > limit)
7199     *stop = limit;
7200   else if (start > pos)
7201     *stop = start;
7202   else
7203     {
7204       if (start == pos)
7205         {
7206           /* We found a composition.  Store the corresponding
7207              annotation data in BUF.  */
7208           int *head = buf;
7209           enum composition_method method = COMPOSITION_METHOD (prop);
7210           int nchars = COMPOSITION_LENGTH (prop);
7211
7212           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
7213           if (method != COMPOSITION_RELATIVE)
7214             {
7215               Lisp_Object components;
7216               int len, i, i_byte;
7217
7218               components = COMPOSITION_COMPONENTS (prop);
7219               if (VECTORP (components))
7220                 {
7221                   len = XVECTOR (components)->size;
7222                   for (i = 0; i < len; i++)
7223                     *buf++ = XINT (AREF (components, i));
7224                 }
7225               else if (STRINGP (components))
7226                 {
7227                   len = SCHARS (components);
7228                   i = i_byte = 0;
7229                   while (i < len)
7230                     {
7231                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
7232                       buf++;
7233                     }
7234                 }
7235               else if (INTEGERP (components))
7236                 {
7237                   len = 1;
7238                   *buf++ = XINT (components);
7239                 }
7240               else if (CONSP (components))
7241                 {
7242                   for (len = 0; CONSP (components);
7243                        len++, components = XCDR (components))
7244                     *buf++ = XINT (XCAR (components));
7245                 }
7246               else
7247                 abort ();
7248               *head -= len;
7249             }
7250         }
7251
7252       if (find_composition (end, limit, &start, &end, &prop,
7253                             coding->src_object)
7254           && end <= limit)
7255         *stop = start;
7256       else
7257         *stop = limit;
7258     }
7259   return buf;
7260 }
7261
7262
7263 /* Extract an annotation datum from a text property `charset' at POS of
7264    CODING->src_object (buffer of string), store the data in BUF, set
7265    *STOP to the position where the value of `charset' property changes
7266    (limiting by LIMIT), and return the address of the next element of
7267    BUF.
7268
7269    If the property value is nil, set *STOP to the position where the
7270    property value is non-nil (limiting by LIMIT), and return BUF.  */
7271
7272 static INLINE int *
7273 handle_charset_annotation (EMACS_INT pos, EMACS_INT limit,
7274                            struct coding_system *coding, int *buf,
7275                            EMACS_INT *stop)
7276 {
7277   Lisp_Object val, next;
7278   int id;
7279
7280   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
7281   if (! NILP (val) && CHARSETP (val))
7282     id = XINT (CHARSET_SYMBOL_ID (val));
7283   else
7284     id = -1;
7285   ADD_CHARSET_DATA (buf, 0, id);
7286   next = Fnext_single_property_change (make_number (pos), Qcharset,
7287                                        coding->src_object,
7288                                        make_number (limit));
7289   *stop = XINT (next);
7290   return buf;
7291 }
7292
7293
7294 static void
7295 consume_chars (struct coding_system *coding, Lisp_Object translation_table,
7296                int max_lookup)
7297 {
7298   int *buf = coding->charbuf;
7299   int *buf_end = coding->charbuf + coding->charbuf_size;
7300   const unsigned char *src = coding->source + coding->consumed;
7301   const unsigned char *src_end = coding->source + coding->src_bytes;
7302   EMACS_INT pos = coding->src_pos + coding->consumed_char;
7303   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
7304   int multibytep = coding->src_multibyte;
7305   Lisp_Object eol_type;
7306   int c;
7307   EMACS_INT stop, stop_composition, stop_charset;
7308   int *lookup_buf = NULL;
7309
7310   if (! NILP (translation_table))
7311     lookup_buf = alloca (sizeof (int) * max_lookup);
7312
7313   eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
7314   if (VECTORP (eol_type))
7315     eol_type = Qunix;
7316
7317   /* Note: composition handling is not yet implemented.  */
7318   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7319
7320   if (NILP (coding->src_object))
7321     stop = stop_composition = stop_charset = end_pos;
7322   else
7323     {
7324       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
7325         stop = stop_composition = pos;
7326       else
7327         stop = stop_composition = end_pos;
7328       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
7329         stop = stop_charset = pos;
7330       else
7331         stop_charset = end_pos;
7332     }
7333
7334   /* Compensate for CRLF and conversion.  */
7335   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
7336   while (buf < buf_end)
7337     {
7338       Lisp_Object trans;
7339
7340       if (pos == stop)
7341         {
7342           if (pos == end_pos)
7343             break;
7344           if (pos == stop_composition)
7345             buf = handle_composition_annotation (pos, end_pos, coding,
7346                                                  buf, &stop_composition);
7347           if (pos == stop_charset)
7348             buf = handle_charset_annotation (pos, end_pos, coding,
7349                                              buf, &stop_charset);
7350           stop = (stop_composition < stop_charset
7351                   ? stop_composition : stop_charset);
7352         }
7353
7354       if (! multibytep)
7355         {
7356           EMACS_INT bytes;
7357
7358           if (coding->encoder == encode_coding_raw_text
7359               || coding->encoder == encode_coding_ccl)
7360             c = *src++, pos++;
7361           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
7362             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
7363           else
7364             c = BYTE8_TO_CHAR (*src), src++, pos++;
7365         }
7366       else
7367         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
7368       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
7369         c = '\n';
7370       if (! EQ (eol_type, Qunix))
7371         {
7372           if (c == '\n')
7373             {
7374               if (EQ (eol_type, Qdos))
7375                 *buf++ = '\r';
7376               else
7377                 c = '\r';
7378             }
7379         }
7380
7381       trans = Qnil;
7382       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
7383       if (NILP (trans))
7384         *buf++ = c;
7385       else
7386         {
7387           int from_nchars = 1, to_nchars = 1;
7388           int *lookup_buf_end;
7389           const unsigned char *p = src;
7390           int i;
7391
7392           lookup_buf[0] = c;
7393           for (i = 1; i < max_lookup && p < src_end; i++)
7394             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7395           lookup_buf_end = lookup_buf + i;
7396           trans = get_translation (trans, lookup_buf, lookup_buf_end);
7397           if (INTEGERP (trans))
7398             c = XINT (trans);
7399           else if (CONSP (trans))
7400             {
7401               from_nchars = ASIZE (XCAR (trans));
7402               trans = XCDR (trans);
7403               if (INTEGERP (trans))
7404                 c = XINT (trans);
7405               else
7406                 {
7407                   to_nchars = ASIZE (trans);
7408                   if (buf + to_nchars > buf_end)
7409                     break;
7410                   c = XINT (AREF (trans, 0));
7411                 }
7412             }
7413           else
7414             break;
7415           *buf++ = c;
7416           for (i = 1; i < to_nchars; i++)
7417             *buf++ = XINT (AREF (trans, i));
7418           for (i = 1; i < from_nchars; i++, pos++)
7419             src += MULTIBYTE_LENGTH_NO_CHECK (src);
7420         }
7421     }
7422
7423   coding->consumed = src - coding->source;
7424   coding->consumed_char = pos - coding->src_pos;
7425   coding->charbuf_used = buf - coding->charbuf;
7426   coding->chars_at_source = 0;
7427 }
7428
7429
7430 /* Encode the text at CODING->src_object into CODING->dst_object.
7431    CODING->src_object is a buffer or a string.
7432    CODING->dst_object is a buffer or nil.
7433
7434    If CODING->src_object is a buffer, it must be the current buffer.
7435    In this case, if CODING->src_pos is positive, it is a position of
7436    the source text in the buffer, otherwise. the source text is in the
7437    gap area of the buffer, and coding->src_pos specifies the offset of
7438    the text from GPT (which must be the same as PT).  If this is the
7439    same buffer as CODING->dst_object, CODING->src_pos must be
7440    negative and CODING should not have `pre-write-conversion'.
7441
7442    If CODING->src_object is a string, CODING should not have
7443    `pre-write-conversion'.
7444
7445    If CODING->dst_object is a buffer, the encoded data is inserted at
7446    the current point of that buffer.
7447
7448    If CODING->dst_object is nil, the encoded data is placed at the
7449    memory area specified by CODING->destination.  */
7450
7451 static int
7452 encode_coding (struct coding_system *coding)
7453 {
7454   Lisp_Object attrs;
7455   Lisp_Object translation_table;
7456   int max_lookup;
7457   struct ccl_spec cclspec;
7458
7459   attrs = CODING_ID_ATTRS (coding->id);
7460   if (coding->encoder == encode_coding_raw_text)
7461     translation_table = Qnil, max_lookup = 0;
7462   else
7463     translation_table = get_translation_table (attrs, 1, &max_lookup);
7464
7465   if (BUFFERP (coding->dst_object))
7466     {
7467       set_buffer_internal (XBUFFER (coding->dst_object));
7468       coding->dst_multibyte
7469         = ! NILP (current_buffer->enable_multibyte_characters);
7470     }
7471
7472   coding->consumed = coding->consumed_char = 0;
7473   coding->produced = coding->produced_char = 0;
7474   record_conversion_result (coding, CODING_RESULT_SUCCESS);
7475   coding->errors = 0;
7476
7477   ALLOC_CONVERSION_WORK_AREA (coding);
7478
7479   if (coding->encoder == encode_coding_ccl)
7480     {
7481       coding->spec.ccl = &cclspec;
7482       setup_ccl_program (&cclspec.ccl, CODING_CCL_ENCODER (coding));
7483     }
7484   do {
7485     coding_set_source (coding);
7486     consume_chars (coding, translation_table, max_lookup);
7487     coding_set_destination (coding);
7488     (*(coding->encoder)) (coding);
7489   } while (coding->consumed_char < coding->src_chars);
7490
7491   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
7492     insert_from_gap (coding->produced_char, coding->produced);
7493
7494   return (coding->result);
7495 }
7496
7497
7498 /* Name (or base name) of work buffer for code conversion.  */
7499 static Lisp_Object Vcode_conversion_workbuf_name;
7500
7501 /* A working buffer used by the top level conversion.  Once it is
7502    created, it is never destroyed.  It has the name
7503    Vcode_conversion_workbuf_name.  The other working buffers are
7504    destroyed after the use is finished, and their names are modified
7505    versions of Vcode_conversion_workbuf_name.  */
7506 static Lisp_Object Vcode_conversion_reused_workbuf;
7507
7508 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
7509 static int reused_workbuf_in_use;
7510
7511
7512 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
7513    multibyteness of returning buffer.  */
7514
7515 static Lisp_Object
7516 make_conversion_work_buffer (int multibyte)
7517 {
7518   Lisp_Object name, workbuf;
7519   struct buffer *current;
7520
7521   if (reused_workbuf_in_use++)
7522     {
7523       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
7524       workbuf = Fget_buffer_create (name);
7525     }
7526   else
7527     {
7528       if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
7529         Vcode_conversion_reused_workbuf
7530           = Fget_buffer_create (Vcode_conversion_workbuf_name);
7531       workbuf = Vcode_conversion_reused_workbuf;
7532     }
7533   current = current_buffer;
7534   set_buffer_internal (XBUFFER (workbuf));
7535   /* We can't allow modification hooks to run in the work buffer.  For
7536      instance, directory_files_internal assumes that file decoding
7537      doesn't compile new regexps.  */
7538   Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
7539   Ferase_buffer ();
7540   current_buffer->undo_list = Qt;
7541   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
7542   set_buffer_internal (current);
7543   return workbuf;
7544 }
7545
7546
7547 static Lisp_Object
7548 code_conversion_restore (Lisp_Object arg)
7549 {
7550   Lisp_Object current, workbuf;
7551   struct gcpro gcpro1;
7552
7553   GCPRO1 (arg);
7554   current = XCAR (arg);
7555   workbuf = XCDR (arg);
7556   if (! NILP (workbuf))
7557     {
7558       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
7559         reused_workbuf_in_use = 0;
7560       else if (! NILP (Fbuffer_live_p (workbuf)))
7561         Fkill_buffer (workbuf);
7562     }
7563   set_buffer_internal (XBUFFER (current));
7564   UNGCPRO;
7565   return Qnil;
7566 }
7567
7568 Lisp_Object
7569 code_conversion_save (int with_work_buf, int multibyte)
7570 {
7571   Lisp_Object workbuf = Qnil;
7572
7573   if (with_work_buf)
7574     workbuf = make_conversion_work_buffer (multibyte);
7575   record_unwind_protect (code_conversion_restore,
7576                          Fcons (Fcurrent_buffer (), workbuf));
7577   return workbuf;
7578 }
7579
7580 int
7581 decode_coding_gap (struct coding_system *coding,
7582                    EMACS_INT chars, EMACS_INT bytes)
7583 {
7584   int count = SPECPDL_INDEX ();
7585   Lisp_Object attrs;
7586
7587   code_conversion_save (0, 0);
7588
7589   coding->src_object = Fcurrent_buffer ();
7590   coding->src_chars = chars;
7591   coding->src_bytes = bytes;
7592   coding->src_pos = -chars;
7593   coding->src_pos_byte = -bytes;
7594   coding->src_multibyte = chars < bytes;
7595   coding->dst_object = coding->src_object;
7596   coding->dst_pos = PT;
7597   coding->dst_pos_byte = PT_BYTE;
7598   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
7599
7600   if (CODING_REQUIRE_DETECTION (coding))
7601     detect_coding (coding);
7602
7603   coding->mode |= CODING_MODE_LAST_BLOCK;
7604   current_buffer->text->inhibit_shrinking = 1;
7605   decode_coding (coding);
7606   current_buffer->text->inhibit_shrinking = 0;
7607
7608   attrs = CODING_ID_ATTRS (coding->id);
7609   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7610     {
7611       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7612       Lisp_Object val;
7613
7614       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7615       val = call1 (CODING_ATTR_POST_READ (attrs),
7616                    make_number (coding->produced_char));
7617       CHECK_NATNUM (val);
7618       coding->produced_char += Z - prev_Z;
7619       coding->produced += Z_BYTE - prev_Z_BYTE;
7620     }
7621
7622   unbind_to (count, Qnil);
7623   return coding->result;
7624 }
7625
7626 int
7627 encode_coding_gap (struct coding_system *coding,
7628                    EMACS_INT chars, EMACS_INT bytes)
7629 {
7630   int count = SPECPDL_INDEX ();
7631
7632   code_conversion_save (0, 0);
7633
7634   coding->src_object = Fcurrent_buffer ();
7635   coding->src_chars = chars;
7636   coding->src_bytes = bytes;
7637   coding->src_pos = -chars;
7638   coding->src_pos_byte = -bytes;
7639   coding->src_multibyte = chars < bytes;
7640   coding->dst_object = coding->src_object;
7641   coding->dst_pos = PT;
7642   coding->dst_pos_byte = PT_BYTE;
7643
7644   encode_coding (coding);
7645
7646   unbind_to (count, Qnil);
7647   return coding->result;
7648 }
7649
7650
7651 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
7652    SRC_OBJECT into DST_OBJECT by coding context CODING.
7653
7654    SRC_OBJECT is a buffer, a string, or Qnil.
7655
7656    If it is a buffer, the text is at point of the buffer.  FROM and TO
7657    are positions in the buffer.
7658
7659    If it is a string, the text is at the beginning of the string.
7660    FROM and TO are indices to the string.
7661
7662    If it is nil, the text is at coding->source.  FROM and TO are
7663    indices to coding->source.
7664
7665    DST_OBJECT is a buffer, Qt, or Qnil.
7666
7667    If it is a buffer, the decoded text is inserted at point of the
7668    buffer.  If the buffer is the same as SRC_OBJECT, the source text
7669    is deleted.
7670
7671    If it is Qt, a string is made from the decoded text, and
7672    set in CODING->dst_object.
7673
7674    If it is Qnil, the decoded text is stored at CODING->destination.
7675    The caller must allocate CODING->dst_bytes bytes at
7676    CODING->destination by xmalloc.  If the decoded text is longer than
7677    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
7678  */
7679
7680 void
7681 decode_coding_object (struct coding_system *coding,
7682                       Lisp_Object src_object,
7683                       EMACS_INT from, EMACS_INT from_byte,
7684                       EMACS_INT to, EMACS_INT to_byte,
7685                       Lisp_Object dst_object)
7686 {
7687   int count = SPECPDL_INDEX ();
7688   unsigned char *destination;
7689   EMACS_INT dst_bytes;
7690   EMACS_INT chars = to - from;
7691   EMACS_INT bytes = to_byte - from_byte;
7692   Lisp_Object attrs;
7693   int saved_pt = -1, saved_pt_byte;
7694   int need_marker_adjustment = 0;
7695   Lisp_Object old_deactivate_mark;
7696
7697   old_deactivate_mark = Vdeactivate_mark;
7698
7699   if (NILP (dst_object))
7700     {
7701       destination = coding->destination;
7702       dst_bytes = coding->dst_bytes;
7703     }
7704
7705   coding->src_object = src_object;
7706   coding->src_chars = chars;
7707   coding->src_bytes = bytes;
7708   coding->src_multibyte = chars < bytes;
7709
7710   if (STRINGP (src_object))
7711     {
7712       coding->src_pos = from;
7713       coding->src_pos_byte = from_byte;
7714     }
7715   else if (BUFFERP (src_object))
7716     {
7717       set_buffer_internal (XBUFFER (src_object));
7718       if (from != GPT)
7719         move_gap_both (from, from_byte);
7720       if (EQ (src_object, dst_object))
7721         {
7722           struct Lisp_Marker *tail;
7723
7724           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7725             {
7726               tail->need_adjustment
7727                 = tail->charpos == (tail->insertion_type ? from : to);
7728               need_marker_adjustment |= tail->need_adjustment;
7729             }
7730           saved_pt = PT, saved_pt_byte = PT_BYTE;
7731           TEMP_SET_PT_BOTH (from, from_byte);
7732           current_buffer->text->inhibit_shrinking = 1;
7733           del_range_both (from, from_byte, to, to_byte, 1);
7734           coding->src_pos = -chars;
7735           coding->src_pos_byte = -bytes;
7736         }
7737       else
7738         {
7739           coding->src_pos = from;
7740           coding->src_pos_byte = from_byte;
7741         }
7742     }
7743
7744   if (CODING_REQUIRE_DETECTION (coding))
7745     detect_coding (coding);
7746   attrs = CODING_ID_ATTRS (coding->id);
7747
7748   if (EQ (dst_object, Qt)
7749       || (! NILP (CODING_ATTR_POST_READ (attrs))
7750           && NILP (dst_object)))
7751     {
7752       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7753       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7754       coding->dst_pos = BEG;
7755       coding->dst_pos_byte = BEG_BYTE;
7756     }
7757   else if (BUFFERP (dst_object))
7758     {
7759       code_conversion_save (0, 0);
7760       coding->dst_object = dst_object;
7761       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7762       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7763       coding->dst_multibyte
7764         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7765     }
7766   else
7767     {
7768       code_conversion_save (0, 0);
7769       coding->dst_object = Qnil;
7770       /* Most callers presume this will return a multibyte result, and they
7771          won't use `binary' or `raw-text' anyway, so let's not worry about
7772          CODING_FOR_UNIBYTE.  */
7773       coding->dst_multibyte = 1;
7774     }
7775
7776   decode_coding (coding);
7777
7778   if (BUFFERP (coding->dst_object))
7779     set_buffer_internal (XBUFFER (coding->dst_object));
7780
7781   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7782     {
7783       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7784       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7785       Lisp_Object val;
7786
7787       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7788       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7789               old_deactivate_mark);
7790       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7791                         make_number (coding->produced_char));
7792       UNGCPRO;
7793       CHECK_NATNUM (val);
7794       coding->produced_char += Z - prev_Z;
7795       coding->produced += Z_BYTE - prev_Z_BYTE;
7796     }
7797
7798   if (EQ (dst_object, Qt))
7799     {
7800       coding->dst_object = Fbuffer_string ();
7801     }
7802   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7803     {
7804       set_buffer_internal (XBUFFER (coding->dst_object));
7805       if (dst_bytes < coding->produced)
7806         {
7807           destination = xrealloc (destination, coding->produced);
7808           if (! destination)
7809             {
7810               record_conversion_result (coding,
7811                                         CODING_RESULT_INSUFFICIENT_MEM);
7812               unbind_to (count, Qnil);
7813               return;
7814             }
7815           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7816             move_gap_both (BEGV, BEGV_BYTE);
7817           memcpy (destination, BEGV_ADDR, coding->produced);
7818           coding->destination = destination;
7819         }
7820     }
7821
7822   if (saved_pt >= 0)
7823     {
7824       /* This is the case of:
7825          (BUFFERP (src_object) && EQ (src_object, dst_object))
7826          As we have moved PT while replacing the original buffer
7827          contents, we must recover it now.  */
7828       set_buffer_internal (XBUFFER (src_object));
7829       current_buffer->text->inhibit_shrinking = 0;
7830       if (saved_pt < from)
7831         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7832       else if (saved_pt < from + chars)
7833         TEMP_SET_PT_BOTH (from, from_byte);
7834       else if (! NILP (current_buffer->enable_multibyte_characters))
7835         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7836                           saved_pt_byte + (coding->produced - bytes));
7837       else
7838         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7839                           saved_pt_byte + (coding->produced - bytes));
7840
7841       if (need_marker_adjustment)
7842         {
7843           struct Lisp_Marker *tail;
7844
7845           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7846             if (tail->need_adjustment)
7847               {
7848                 tail->need_adjustment = 0;
7849                 if (tail->insertion_type)
7850                   {
7851                     tail->bytepos = from_byte;
7852                     tail->charpos = from;
7853                   }
7854                 else
7855                   {
7856                     tail->bytepos = from_byte + coding->produced;
7857                     tail->charpos
7858                       = (NILP (current_buffer->enable_multibyte_characters)
7859                          ? tail->bytepos : from + coding->produced_char);
7860                   }
7861               }
7862         }
7863     }
7864
7865   Vdeactivate_mark = old_deactivate_mark;
7866   unbind_to (count, coding->dst_object);
7867 }
7868
7869
7870 void
7871 encode_coding_object (struct coding_system *coding,
7872                       Lisp_Object src_object,
7873                       EMACS_INT from, EMACS_INT from_byte,
7874                       EMACS_INT to, EMACS_INT to_byte,
7875                       Lisp_Object dst_object)
7876 {
7877   int count = SPECPDL_INDEX ();
7878   EMACS_INT chars = to - from;
7879   EMACS_INT bytes = to_byte - from_byte;
7880   Lisp_Object attrs;
7881   int saved_pt = -1, saved_pt_byte;
7882   int need_marker_adjustment = 0;
7883   int kill_src_buffer = 0;
7884   Lisp_Object old_deactivate_mark;
7885
7886   old_deactivate_mark = Vdeactivate_mark;
7887
7888   coding->src_object = src_object;
7889   coding->src_chars = chars;
7890   coding->src_bytes = bytes;
7891   coding->src_multibyte = chars < bytes;
7892
7893   attrs = CODING_ID_ATTRS (coding->id);
7894
7895   if (EQ (src_object, dst_object))
7896     {
7897       struct Lisp_Marker *tail;
7898
7899       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7900         {
7901           tail->need_adjustment
7902             = tail->charpos == (tail->insertion_type ? from : to);
7903           need_marker_adjustment |= tail->need_adjustment;
7904         }
7905     }
7906
7907   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7908     {
7909       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7910       set_buffer_internal (XBUFFER (coding->src_object));
7911       if (STRINGP (src_object))
7912         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7913       else if (BUFFERP (src_object))
7914         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7915       else
7916         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7917
7918       if (EQ (src_object, dst_object))
7919         {
7920           set_buffer_internal (XBUFFER (src_object));
7921           saved_pt = PT, saved_pt_byte = PT_BYTE;
7922           del_range_both (from, from_byte, to, to_byte, 1);
7923           set_buffer_internal (XBUFFER (coding->src_object));
7924         }
7925
7926       {
7927         Lisp_Object args[3];
7928         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7929
7930         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7931                 old_deactivate_mark);
7932         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7933         args[1] = make_number (BEG);
7934         args[2] = make_number (Z);
7935         safe_call (3, args);
7936         UNGCPRO;
7937       }
7938       if (XBUFFER (coding->src_object) != current_buffer)
7939         kill_src_buffer = 1;
7940       coding->src_object = Fcurrent_buffer ();
7941       if (BEG != GPT)
7942         move_gap_both (BEG, BEG_BYTE);
7943       coding->src_chars = Z - BEG;
7944       coding->src_bytes = Z_BYTE - BEG_BYTE;
7945       coding->src_pos = BEG;
7946       coding->src_pos_byte = BEG_BYTE;
7947       coding->src_multibyte = Z < Z_BYTE;
7948     }
7949   else if (STRINGP (src_object))
7950     {
7951       code_conversion_save (0, 0);
7952       coding->src_pos = from;
7953       coding->src_pos_byte = from_byte;
7954     }
7955   else if (BUFFERP (src_object))
7956     {
7957       code_conversion_save (0, 0);
7958       set_buffer_internal (XBUFFER (src_object));
7959       if (EQ (src_object, dst_object))
7960         {
7961           saved_pt = PT, saved_pt_byte = PT_BYTE;
7962           coding->src_object = del_range_1 (from, to, 1, 1);
7963           coding->src_pos = 0;
7964           coding->src_pos_byte = 0;
7965         }
7966       else
7967         {
7968           if (from < GPT && to >= GPT)
7969             move_gap_both (from, from_byte);
7970           coding->src_pos = from;
7971           coding->src_pos_byte = from_byte;
7972         }
7973     }
7974   else
7975     code_conversion_save (0, 0);
7976
7977   if (BUFFERP (dst_object))
7978     {
7979       coding->dst_object = dst_object;
7980       if (EQ (src_object, dst_object))
7981         {
7982           coding->dst_pos = from;
7983           coding->dst_pos_byte = from_byte;
7984         }
7985       else
7986         {
7987           struct buffer *current = current_buffer;
7988
7989           set_buffer_temp (XBUFFER (dst_object));
7990           coding->dst_pos = PT;
7991           coding->dst_pos_byte = PT_BYTE;
7992           move_gap_both (coding->dst_pos, coding->dst_pos_byte);
7993           set_buffer_temp (current);
7994         }
7995       coding->dst_multibyte
7996         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7997     }
7998   else if (EQ (dst_object, Qt))
7999     {
8000       coding->dst_object = Qnil;
8001       coding->dst_bytes = coding->src_chars;
8002       if (coding->dst_bytes == 0)
8003         coding->dst_bytes = 1;
8004       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
8005       coding->dst_multibyte = 0;
8006     }
8007   else
8008     {
8009       coding->dst_object = Qnil;
8010       coding->dst_multibyte = 0;
8011     }
8012
8013   encode_coding (coding);
8014
8015   if (EQ (dst_object, Qt))
8016     {
8017       if (BUFFERP (coding->dst_object))
8018         coding->dst_object = Fbuffer_string ();
8019       else
8020         {
8021           coding->dst_object
8022             = make_unibyte_string ((char *) coding->destination,
8023                                    coding->produced);
8024           xfree (coding->destination);
8025         }
8026     }
8027
8028   if (saved_pt >= 0)
8029     {
8030       /* This is the case of:
8031          (BUFFERP (src_object) && EQ (src_object, dst_object))
8032          As we have moved PT while replacing the original buffer
8033          contents, we must recover it now.  */
8034       set_buffer_internal (XBUFFER (src_object));
8035       if (saved_pt < from)
8036         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
8037       else if (saved_pt < from + chars)
8038         TEMP_SET_PT_BOTH (from, from_byte);
8039       else if (! NILP (current_buffer->enable_multibyte_characters))
8040         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
8041                           saved_pt_byte + (coding->produced - bytes));
8042       else
8043         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
8044                           saved_pt_byte + (coding->produced - bytes));
8045
8046       if (need_marker_adjustment)
8047         {
8048           struct Lisp_Marker *tail;
8049
8050           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
8051             if (tail->need_adjustment)
8052               {
8053                 tail->need_adjustment = 0;
8054                 if (tail->insertion_type)
8055                   {
8056                     tail->bytepos = from_byte;
8057                     tail->charpos = from;
8058                   }
8059                 else
8060                   {
8061                     tail->bytepos = from_byte + coding->produced;
8062                     tail->charpos
8063                       = (NILP (current_buffer->enable_multibyte_characters)
8064                          ? tail->bytepos : from + coding->produced_char);
8065                   }
8066               }
8067         }
8068     }
8069
8070   if (kill_src_buffer)
8071     Fkill_buffer (coding->src_object);
8072
8073   Vdeactivate_mark = old_deactivate_mark;
8074   unbind_to (count, Qnil);
8075 }
8076
8077
8078 Lisp_Object
8079 preferred_coding_system (void)
8080 {
8081   int id = coding_categories[coding_priorities[0]].id;
8082
8083   return CODING_ID_NAME (id);
8084 }
8085
8086 \f
8087 #ifdef emacs
8088 /*** 8. Emacs Lisp library functions ***/
8089
8090 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
8091        doc: /* Return t if OBJECT is nil or a coding-system.
8092 See the documentation of `define-coding-system' for information
8093 about coding-system objects.  */)
8094   (Lisp_Object object)
8095 {
8096   if (NILP (object)
8097       || CODING_SYSTEM_ID (object) >= 0)
8098     return Qt;
8099   if (! SYMBOLP (object)
8100       || NILP (Fget (object, Qcoding_system_define_form)))
8101     return Qnil;
8102   return Qt;
8103 }
8104
8105 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
8106        Sread_non_nil_coding_system, 1, 1, 0,
8107        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
8108   (Lisp_Object prompt)
8109 {
8110   Lisp_Object val;
8111   do
8112     {
8113       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8114                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
8115     }
8116   while (SCHARS (val) == 0);
8117   return (Fintern (val, Qnil));
8118 }
8119
8120 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
8121        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
8122 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
8123 Ignores case when completing coding systems (all Emacs coding systems
8124 are lower-case).  */)
8125   (Lisp_Object prompt, Lisp_Object default_coding_system)
8126 {
8127   Lisp_Object val;
8128   int count = SPECPDL_INDEX ();
8129
8130   if (SYMBOLP (default_coding_system))
8131     default_coding_system = SYMBOL_NAME (default_coding_system);
8132   specbind (Qcompletion_ignore_case, Qt);
8133   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
8134                           Qt, Qnil, Qcoding_system_history,
8135                           default_coding_system, Qnil);
8136   unbind_to (count, Qnil);
8137   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
8138 }
8139
8140 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
8141        1, 1, 0,
8142        doc: /* Check validity of CODING-SYSTEM.
8143 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
8144 It is valid if it is nil or a symbol defined as a coding system by the
8145 function `define-coding-system'.  */)
8146   (Lisp_Object coding_system)
8147 {
8148   Lisp_Object define_form;
8149
8150   define_form = Fget (coding_system, Qcoding_system_define_form);
8151   if (! NILP (define_form))
8152     {
8153       Fput (coding_system, Qcoding_system_define_form, Qnil);
8154       safe_eval (define_form);
8155     }
8156   if (!NILP (Fcoding_system_p (coding_system)))
8157     return coding_system;
8158   xsignal1 (Qcoding_system_error, coding_system);
8159 }
8160
8161 \f
8162 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
8163    HIGHEST is nonzero, return the coding system of the highest
8164    priority among the detected coding systems.  Otherwize return a
8165    list of detected coding systems sorted by their priorities.  If
8166    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
8167    multibyte form but contains only ASCII and eight-bit chars.
8168    Otherwise, the bytes are raw bytes.
8169
8170    CODING-SYSTEM controls the detection as below:
8171
8172    If it is nil, detect both text-format and eol-format.  If the
8173    text-format part of CODING-SYSTEM is already specified
8174    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
8175    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
8176    detect only text-format.  */
8177
8178 Lisp_Object
8179 detect_coding_system (const unsigned char *src,
8180                       EMACS_INT src_chars, EMACS_INT src_bytes,
8181                       int highest, int multibytep,
8182                       Lisp_Object coding_system)
8183 {
8184   const unsigned char *src_end = src + src_bytes;
8185   Lisp_Object attrs, eol_type;
8186   Lisp_Object val = Qnil;
8187   struct coding_system coding;
8188   int id;
8189   struct coding_detection_info detect_info;
8190   enum coding_category base_category;
8191   int null_byte_found = 0, eight_bit_found = 0;
8192
8193   if (NILP (coding_system))
8194     coding_system = Qundecided;
8195   setup_coding_system (coding_system, &coding);
8196   attrs = CODING_ID_ATTRS (coding.id);
8197   eol_type = CODING_ID_EOL_TYPE (coding.id);
8198   coding_system = CODING_ATTR_BASE_NAME (attrs);
8199
8200   coding.source = src;
8201   coding.src_chars = src_chars;
8202   coding.src_bytes = src_bytes;
8203   coding.src_multibyte = multibytep;
8204   coding.consumed = 0;
8205   coding.mode |= CODING_MODE_LAST_BLOCK;
8206   coding.head_ascii = 0;
8207
8208   detect_info.checked = detect_info.found = detect_info.rejected = 0;
8209
8210   /* At first, detect text-format if necessary.  */
8211   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
8212   if (base_category == coding_category_undecided)
8213     {
8214       enum coding_category category;
8215       struct coding_system *this;
8216       int c, i;
8217
8218       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
8219       for (; src < src_end; src++)
8220         {
8221           c = *src;
8222           if (c & 0x80)
8223             {
8224               eight_bit_found = 1;
8225               if (null_byte_found)
8226                 break;
8227             }
8228           else if (c < 0x20)
8229             {
8230               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
8231                   && ! inhibit_iso_escape_detection
8232                   && ! detect_info.checked)
8233                 {
8234                   if (detect_coding_iso_2022 (&coding, &detect_info))
8235                     {
8236                       /* We have scanned the whole data.  */
8237                       if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
8238                         {
8239                           /* We didn't find an 8-bit code.  We may
8240                              have found a null-byte, but it's very
8241                              rare that a binary file confirm to
8242                              ISO-2022.  */
8243                           src = src_end;
8244                           coding.head_ascii = src - coding.source;
8245                         }
8246                       detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
8247                       break;
8248                     }
8249                 }
8250               else if (! c && !inhibit_null_byte_detection)
8251                 {
8252                   null_byte_found = 1;
8253                   if (eight_bit_found)
8254                     break;
8255                 }
8256               if (! eight_bit_found)
8257                 coding.head_ascii++;
8258             }
8259           else if (! eight_bit_found)
8260             coding.head_ascii++;
8261         }
8262
8263       if (null_byte_found || eight_bit_found
8264           || coding.head_ascii < coding.src_bytes
8265           || detect_info.found)
8266         {
8267           if (coding.head_ascii == coding.src_bytes)
8268             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
8269             for (i = 0; i < coding_category_raw_text; i++)
8270               {
8271                 category = coding_priorities[i];
8272                 this = coding_categories + category;
8273                 if (detect_info.found & (1 << category))
8274                   break;
8275               }
8276           else
8277             {
8278               if (null_byte_found)
8279                 {
8280                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
8281                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
8282                 }
8283               for (i = 0; i < coding_category_raw_text; i++)
8284                 {
8285                   category = coding_priorities[i];
8286                   this = coding_categories + category;
8287
8288                   if (this->id < 0)
8289                     {
8290                       /* No coding system of this category is defined.  */
8291                       detect_info.rejected |= (1 << category);
8292                     }
8293                   else if (category >= coding_category_raw_text)
8294                     continue;
8295                   else if (detect_info.checked & (1 << category))
8296                     {
8297                       if (highest
8298                           && (detect_info.found & (1 << category)))
8299                         break;
8300                     }
8301                   else if ((*(this->detector)) (&coding, &detect_info)
8302                            && highest
8303                            && (detect_info.found & (1 << category)))
8304                     {
8305                       if (category == coding_category_utf_16_auto)
8306                         {
8307                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8308                             category = coding_category_utf_16_le;
8309                           else
8310                             category = coding_category_utf_16_be;
8311                         }
8312                       break;
8313                     }
8314                 }
8315             }
8316         }
8317
8318       if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY
8319           || null_byte_found)
8320         {
8321           detect_info.found = CATEGORY_MASK_RAW_TEXT;
8322           id = CODING_SYSTEM_ID (Qno_conversion);
8323           val = Fcons (make_number (id), Qnil);
8324         }
8325       else if (! detect_info.rejected && ! detect_info.found)
8326         {
8327           detect_info.found = CATEGORY_MASK_ANY;
8328           id = coding_categories[coding_category_undecided].id;
8329           val = Fcons (make_number (id), Qnil);
8330         }
8331       else if (highest)
8332         {
8333           if (detect_info.found)
8334             {
8335               detect_info.found = 1 << category;
8336               val = Fcons (make_number (this->id), Qnil);
8337             }
8338           else
8339             for (i = 0; i < coding_category_raw_text; i++)
8340               if (! (detect_info.rejected & (1 << coding_priorities[i])))
8341                 {
8342                   detect_info.found = 1 << coding_priorities[i];
8343                   id = coding_categories[coding_priorities[i]].id;
8344                   val = Fcons (make_number (id), Qnil);
8345                   break;
8346                 }
8347         }
8348       else
8349         {
8350           int mask = detect_info.rejected | detect_info.found;
8351           int found = 0;
8352
8353           for (i = coding_category_raw_text - 1; i >= 0; i--)
8354             {
8355               category = coding_priorities[i];
8356               if (! (mask & (1 << category)))
8357                 {
8358                   found |= 1 << category;
8359                   id = coding_categories[category].id;
8360                   if (id >= 0)
8361                     val = Fcons (make_number (id), val);
8362                 }
8363             }
8364           for (i = coding_category_raw_text - 1; i >= 0; i--)
8365             {
8366               category = coding_priorities[i];
8367               if (detect_info.found & (1 << category))
8368                 {
8369                   id = coding_categories[category].id;
8370                   val = Fcons (make_number (id), val);
8371                 }
8372             }
8373           detect_info.found |= found;
8374         }
8375     }
8376   else if (base_category == coding_category_utf_8_auto)
8377     {
8378       if (detect_coding_utf_8 (&coding, &detect_info))
8379         {
8380           struct coding_system *this;
8381
8382           if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
8383             this = coding_categories + coding_category_utf_8_sig;
8384           else
8385             this = coding_categories + coding_category_utf_8_nosig;
8386           val = Fcons (make_number (this->id), Qnil);
8387         }
8388     }
8389   else if (base_category == coding_category_utf_16_auto)
8390     {
8391       if (detect_coding_utf_16 (&coding, &detect_info))
8392         {
8393           struct coding_system *this;
8394
8395           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
8396             this = coding_categories + coding_category_utf_16_le;
8397           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
8398             this = coding_categories + coding_category_utf_16_be;
8399           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
8400             this = coding_categories + coding_category_utf_16_be_nosig;
8401           else
8402             this = coding_categories + coding_category_utf_16_le_nosig;
8403           val = Fcons (make_number (this->id), Qnil);
8404         }
8405     }
8406   else
8407     {
8408       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
8409       val = Fcons (make_number (coding.id), Qnil);
8410     }
8411
8412   /* Then, detect eol-format if necessary.  */
8413   {
8414     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
8415     Lisp_Object tail;
8416
8417     if (VECTORP (eol_type))
8418       {
8419         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
8420           {
8421             if (null_byte_found)
8422               normal_eol = EOL_SEEN_LF;
8423             else
8424               normal_eol = detect_eol (coding.source, src_bytes,
8425                                        coding_category_raw_text);
8426           }
8427         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
8428                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
8429           utf_16_be_eol = detect_eol (coding.source, src_bytes,
8430                                       coding_category_utf_16_be);
8431         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
8432                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
8433           utf_16_le_eol = detect_eol (coding.source, src_bytes,
8434                                       coding_category_utf_16_le);
8435       }
8436     else
8437       {
8438         if (EQ (eol_type, Qunix))
8439           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
8440         else if (EQ (eol_type, Qdos))
8441           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
8442         else
8443           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
8444       }
8445
8446     for (tail = val; CONSP (tail); tail = XCDR (tail))
8447       {
8448         enum coding_category category;
8449         int this_eol;
8450
8451         id = XINT (XCAR (tail));
8452         attrs = CODING_ID_ATTRS (id);
8453         category = XINT (CODING_ATTR_CATEGORY (attrs));
8454         eol_type = CODING_ID_EOL_TYPE (id);
8455         if (VECTORP (eol_type))
8456           {
8457             if (category == coding_category_utf_16_be
8458                 || category == coding_category_utf_16_be_nosig)
8459               this_eol = utf_16_be_eol;
8460             else if (category == coding_category_utf_16_le
8461                      || category == coding_category_utf_16_le_nosig)
8462               this_eol = utf_16_le_eol;
8463             else
8464               this_eol = normal_eol;
8465
8466             if (this_eol == EOL_SEEN_LF)
8467               XSETCAR (tail, AREF (eol_type, 0));
8468             else if (this_eol == EOL_SEEN_CRLF)
8469               XSETCAR (tail, AREF (eol_type, 1));
8470             else if (this_eol == EOL_SEEN_CR)
8471               XSETCAR (tail, AREF (eol_type, 2));
8472             else
8473               XSETCAR (tail, CODING_ID_NAME (id));
8474           }
8475         else
8476           XSETCAR (tail, CODING_ID_NAME (id));
8477       }
8478   }
8479
8480   return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
8481 }
8482
8483
8484 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
8485        2, 3, 0,
8486        doc: /* Detect coding system of the text in the region between START and END.
8487 Return a list of possible coding systems ordered by priority.
8488 The coding systems to try and their priorities follows what
8489 the function `coding-system-priority-list' (which see) returns.
8490
8491 If only ASCII characters are found (except for such ISO-2022 control
8492 characters as ESC), it returns a list of single element `undecided'
8493 or its subsidiary coding system according to a detected end-of-line
8494 format.
8495
8496 If optional argument HIGHEST is non-nil, return the coding system of
8497 highest priority.  */)
8498   (Lisp_Object start, Lisp_Object end, Lisp_Object highest)
8499 {
8500   int from, to;
8501   int from_byte, to_byte;
8502
8503   CHECK_NUMBER_COERCE_MARKER (start);
8504   CHECK_NUMBER_COERCE_MARKER (end);
8505
8506   validate_region (&start, &end);
8507   from = XINT (start), to = XINT (end);
8508   from_byte = CHAR_TO_BYTE (from);
8509   to_byte = CHAR_TO_BYTE (to);
8510
8511   if (from < GPT && to >= GPT)
8512     move_gap_both (to, to_byte);
8513
8514   return detect_coding_system (BYTE_POS_ADDR (from_byte),
8515                                to - from, to_byte - from_byte,
8516                                !NILP (highest),
8517                                !NILP (current_buffer
8518                                       ->enable_multibyte_characters),
8519                                Qnil);
8520 }
8521
8522 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
8523        1, 2, 0,
8524        doc: /* Detect coding system of the text in STRING.
8525 Return a list of possible coding systems ordered by priority.
8526 The coding systems to try and their priorities follows what
8527 the function `coding-system-priority-list' (which see) returns.
8528
8529 If only ASCII characters are found (except for such ISO-2022 control
8530 characters as ESC), it returns a list of single element `undecided'
8531 or its subsidiary coding system according to a detected end-of-line
8532 format.
8533
8534 If optional argument HIGHEST is non-nil, return the coding system of
8535 highest priority.  */)
8536   (Lisp_Object string, Lisp_Object highest)
8537 {
8538   CHECK_STRING (string);
8539
8540   return detect_coding_system (SDATA (string),
8541                                SCHARS (string), SBYTES (string),
8542                                !NILP (highest), STRING_MULTIBYTE (string),
8543                                Qnil);
8544 }
8545
8546
8547 static INLINE int
8548 char_encodable_p (int c, Lisp_Object attrs)
8549 {
8550   Lisp_Object tail;
8551   struct charset *charset;
8552   Lisp_Object translation_table;
8553
8554   translation_table = CODING_ATTR_TRANS_TBL (attrs);
8555   if (! NILP (translation_table))
8556     c = translate_char (translation_table, c);
8557   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
8558        CONSP (tail); tail = XCDR (tail))
8559     {
8560       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
8561       if (CHAR_CHARSET_P (c, charset))
8562         break;
8563     }
8564   return (! NILP (tail));
8565 }
8566
8567
8568 /* Return a list of coding systems that safely encode the text between
8569    START and END.  If EXCLUDE is non-nil, it is a list of coding
8570    systems not to check.  The returned list doesn't contain any such
8571    coding systems.  In any case, if the text contains only ASCII or is
8572    unibyte, return t.  */
8573
8574 DEFUN ("find-coding-systems-region-internal",
8575        Ffind_coding_systems_region_internal,
8576        Sfind_coding_systems_region_internal, 2, 3, 0,
8577        doc: /* Internal use only.  */)
8578   (Lisp_Object start, Lisp_Object end, Lisp_Object exclude)
8579 {
8580   Lisp_Object coding_attrs_list, safe_codings;
8581   EMACS_INT start_byte, end_byte;
8582   const unsigned char *p, *pbeg, *pend;
8583   int c;
8584   Lisp_Object tail, elt, work_table;
8585
8586   if (STRINGP (start))
8587     {
8588       if (!STRING_MULTIBYTE (start)
8589           || SCHARS (start) == SBYTES (start))
8590         return Qt;
8591       start_byte = 0;
8592       end_byte = SBYTES (start);
8593     }
8594   else
8595     {
8596       CHECK_NUMBER_COERCE_MARKER (start);
8597       CHECK_NUMBER_COERCE_MARKER (end);
8598       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8599         args_out_of_range (start, end);
8600       if (NILP (current_buffer->enable_multibyte_characters))
8601         return Qt;
8602       start_byte = CHAR_TO_BYTE (XINT (start));
8603       end_byte = CHAR_TO_BYTE (XINT (end));
8604       if (XINT (end) - XINT (start) == end_byte - start_byte)
8605         return Qt;
8606
8607       if (XINT (start) < GPT && XINT (end) > GPT)
8608         {
8609           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8610             move_gap_both (XINT (start), start_byte);
8611           else
8612             move_gap_both (XINT (end), end_byte);
8613         }
8614     }
8615
8616   coding_attrs_list = Qnil;
8617   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
8618     if (NILP (exclude)
8619         || NILP (Fmemq (XCAR (tail), exclude)))
8620       {
8621         Lisp_Object attrs;
8622
8623         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
8624         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
8625             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
8626           {
8627             ASET (attrs, coding_attr_trans_tbl,
8628                   get_translation_table (attrs, 1, NULL));
8629             coding_attrs_list = Fcons (attrs, coding_attrs_list);
8630           }
8631       }
8632
8633   if (STRINGP (start))
8634     p = pbeg = SDATA (start);
8635   else
8636     p = pbeg = BYTE_POS_ADDR (start_byte);
8637   pend = p + (end_byte - start_byte);
8638
8639   while (p < pend && ASCII_BYTE_P (*p)) p++;
8640   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8641
8642   work_table = Fmake_char_table (Qnil, Qnil);
8643   while (p < pend)
8644     {
8645       if (ASCII_BYTE_P (*p))
8646         p++;
8647       else
8648         {
8649           c = STRING_CHAR_ADVANCE (p);
8650           if (!NILP (char_table_ref (work_table, c)))
8651             /* This character was already checked.  Ignore it.  */
8652             continue;
8653
8654           charset_map_loaded = 0;
8655           for (tail = coding_attrs_list; CONSP (tail);)
8656             {
8657               elt = XCAR (tail);
8658               if (NILP (elt))
8659                 tail = XCDR (tail);
8660               else if (char_encodable_p (c, elt))
8661                 tail = XCDR (tail);
8662               else if (CONSP (XCDR (tail)))
8663                 {
8664                   XSETCAR (tail, XCAR (XCDR (tail)));
8665                   XSETCDR (tail, XCDR (XCDR (tail)));
8666                 }
8667               else
8668                 {
8669                   XSETCAR (tail, Qnil);
8670                   tail = XCDR (tail);
8671                 }
8672             }
8673           if (charset_map_loaded)
8674             {
8675               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8676
8677               if (STRINGP (start))
8678                 pbeg = SDATA (start);
8679               else
8680                 pbeg = BYTE_POS_ADDR (start_byte);
8681               p = pbeg + p_offset;
8682               pend = pbeg + pend_offset;
8683             }
8684           char_table_set (work_table, c, Qt);
8685         }
8686     }
8687
8688   safe_codings = list2 (Qraw_text, Qno_conversion);
8689   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
8690     if (! NILP (XCAR (tail)))
8691       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
8692
8693   return safe_codings;
8694 }
8695
8696
8697 DEFUN ("unencodable-char-position", Funencodable_char_position,
8698        Sunencodable_char_position, 3, 5, 0,
8699        doc: /*
8700 Return position of first un-encodable character in a region.
8701 START and END specify the region and CODING-SYSTEM specifies the
8702 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
8703
8704 If optional 4th argument COUNT is non-nil, it specifies at most how
8705 many un-encodable characters to search.  In this case, the value is a
8706 list of positions.
8707
8708 If optional 5th argument STRING is non-nil, it is a string to search
8709 for un-encodable characters.  In that case, START and END are indexes
8710 to the string.  */)
8711   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object count, Lisp_Object string)
8712 {
8713   int n;
8714   struct coding_system coding;
8715   Lisp_Object attrs, charset_list, translation_table;
8716   Lisp_Object positions;
8717   int from, to;
8718   const unsigned char *p, *stop, *pend;
8719   int ascii_compatible;
8720
8721   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
8722   attrs = CODING_ID_ATTRS (coding.id);
8723   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
8724     return Qnil;
8725   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
8726   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8727   translation_table = get_translation_table (attrs, 1, NULL);
8728
8729   if (NILP (string))
8730     {
8731       validate_region (&start, &end);
8732       from = XINT (start);
8733       to = XINT (end);
8734       if (NILP (current_buffer->enable_multibyte_characters)
8735           || (ascii_compatible
8736               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
8737         return Qnil;
8738       p = CHAR_POS_ADDR (from);
8739       pend = CHAR_POS_ADDR (to);
8740       if (from < GPT && to >= GPT)
8741         stop = GPT_ADDR;
8742       else
8743         stop = pend;
8744     }
8745   else
8746     {
8747       CHECK_STRING (string);
8748       CHECK_NATNUM (start);
8749       CHECK_NATNUM (end);
8750       from = XINT (start);
8751       to = XINT (end);
8752       if (from > to
8753           || to > SCHARS (string))
8754         args_out_of_range_3 (string, start, end);
8755       if (! STRING_MULTIBYTE (string))
8756         return Qnil;
8757       p = SDATA (string) + string_char_to_byte (string, from);
8758       stop = pend = SDATA (string) + string_char_to_byte (string, to);
8759       if (ascii_compatible && (to - from) == (pend - p))
8760         return Qnil;
8761     }
8762
8763   if (NILP (count))
8764     n = 1;
8765   else
8766     {
8767       CHECK_NATNUM (count);
8768       n = XINT (count);
8769     }
8770
8771   positions = Qnil;
8772   while (1)
8773     {
8774       int c;
8775
8776       if (ascii_compatible)
8777         while (p < stop && ASCII_BYTE_P (*p))
8778           p++, from++;
8779       if (p >= stop)
8780         {
8781           if (p >= pend)
8782             break;
8783           stop = pend;
8784           p = GAP_END_ADDR;
8785         }
8786
8787       c = STRING_CHAR_ADVANCE (p);
8788       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8789           && ! char_charset (translate_char (translation_table, c),
8790                              charset_list, NULL))
8791         {
8792           positions = Fcons (make_number (from), positions);
8793           n--;
8794           if (n == 0)
8795             break;
8796         }
8797
8798       from++;
8799     }
8800
8801   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8802 }
8803
8804
8805 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8806        Scheck_coding_systems_region, 3, 3, 0,
8807        doc: /* Check if the region is encodable by coding systems.
8808
8809 START and END are buffer positions specifying the region.
8810 CODING-SYSTEM-LIST is a list of coding systems to check.
8811
8812 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8813 CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
8814 whole region, POS0, POS1, ... are buffer positions where non-encodable
8815 characters are found.
8816
8817 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8818 value is nil.
8819
8820 START may be a string.  In that case, check if the string is
8821 encodable, and the value contains indices to the string instead of
8822 buffer positions.  END is ignored.
8823
8824 If the current buffer (or START if it is a string) is unibyte, the value
8825 is nil.  */)
8826   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system_list)
8827 {
8828   Lisp_Object list;
8829   EMACS_INT start_byte, end_byte;
8830   int pos;
8831   const unsigned char *p, *pbeg, *pend;
8832   int c;
8833   Lisp_Object tail, elt, attrs;
8834
8835   if (STRINGP (start))
8836     {
8837       if (!STRING_MULTIBYTE (start)
8838           || SCHARS (start) == SBYTES (start))
8839         return Qnil;
8840       start_byte = 0;
8841       end_byte = SBYTES (start);
8842       pos = 0;
8843     }
8844   else
8845     {
8846       CHECK_NUMBER_COERCE_MARKER (start);
8847       CHECK_NUMBER_COERCE_MARKER (end);
8848       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8849         args_out_of_range (start, end);
8850       if (NILP (current_buffer->enable_multibyte_characters))
8851         return Qnil;
8852       start_byte = CHAR_TO_BYTE (XINT (start));
8853       end_byte = CHAR_TO_BYTE (XINT (end));
8854       if (XINT (end) - XINT (start) == end_byte - start_byte)
8855         return Qnil;
8856
8857       if (XINT (start) < GPT && XINT (end) > GPT)
8858         {
8859           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8860             move_gap_both (XINT (start), start_byte);
8861           else
8862             move_gap_both (XINT (end), end_byte);
8863         }
8864       pos = XINT (start);
8865     }
8866
8867   list = Qnil;
8868   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8869     {
8870       elt = XCAR (tail);
8871       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8872       ASET (attrs, coding_attr_trans_tbl,
8873             get_translation_table (attrs, 1, NULL));
8874       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8875     }
8876
8877   if (STRINGP (start))
8878     p = pbeg = SDATA (start);
8879   else
8880     p = pbeg = BYTE_POS_ADDR (start_byte);
8881   pend = p + (end_byte - start_byte);
8882
8883   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8884   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8885
8886   while (p < pend)
8887     {
8888       if (ASCII_BYTE_P (*p))
8889         p++;
8890       else
8891         {
8892           c = STRING_CHAR_ADVANCE (p);
8893
8894           charset_map_loaded = 0;
8895           for (tail = list; CONSP (tail); tail = XCDR (tail))
8896             {
8897               elt = XCDR (XCAR (tail));
8898               if (! char_encodable_p (c, XCAR (elt)))
8899                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8900             }
8901           if (charset_map_loaded)
8902             {
8903               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8904
8905               if (STRINGP (start))
8906                 pbeg = SDATA (start);
8907               else
8908                 pbeg = BYTE_POS_ADDR (start_byte);
8909               p = pbeg + p_offset;
8910               pend = pbeg + pend_offset;
8911             }
8912         }
8913       pos++;
8914     }
8915
8916   tail = list;
8917   list = Qnil;
8918   for (; CONSP (tail); tail = XCDR (tail))
8919     {
8920       elt = XCAR (tail);
8921       if (CONSP (XCDR (XCDR (elt))))
8922         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8923                       list);
8924     }
8925
8926   return list;
8927 }
8928
8929
8930 Lisp_Object
8931 code_convert_region (Lisp_Object start, Lisp_Object end,
8932                      Lisp_Object coding_system, Lisp_Object dst_object,
8933                      int encodep, int norecord)
8934 {
8935   struct coding_system coding;
8936   EMACS_INT from, from_byte, to, to_byte;
8937   Lisp_Object src_object;
8938
8939   CHECK_NUMBER_COERCE_MARKER (start);
8940   CHECK_NUMBER_COERCE_MARKER (end);
8941   if (NILP (coding_system))
8942     coding_system = Qno_conversion;
8943   else
8944     CHECK_CODING_SYSTEM (coding_system);
8945   src_object = Fcurrent_buffer ();
8946   if (NILP (dst_object))
8947     dst_object = src_object;
8948   else if (! EQ (dst_object, Qt))
8949     CHECK_BUFFER (dst_object);
8950
8951   validate_region (&start, &end);
8952   from = XFASTINT (start);
8953   from_byte = CHAR_TO_BYTE (from);
8954   to = XFASTINT (end);
8955   to_byte = CHAR_TO_BYTE (to);
8956
8957   setup_coding_system (coding_system, &coding);
8958   coding.mode |= CODING_MODE_LAST_BLOCK;
8959
8960   if (encodep)
8961     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8962                           dst_object);
8963   else
8964     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8965                           dst_object);
8966   if (! norecord)
8967     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8968
8969   return (BUFFERP (dst_object)
8970           ? make_number (coding.produced_char)
8971           : coding.dst_object);
8972 }
8973
8974
8975 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8976        3, 4, "r\nzCoding system: ",
8977        doc: /* Decode the current region from the specified coding system.
8978 When called from a program, takes four arguments:
8979         START, END, CODING-SYSTEM, and DESTINATION.
8980 START and END are buffer positions.
8981
8982 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8983 If nil, the region between START and END is replaced by the decoded text.
8984 If buffer, the decoded text is inserted in that buffer after point (point
8985 does not move).
8986 In those cases, the length of the decoded text is returned.
8987 If DESTINATION is t, the decoded text is returned.
8988
8989 This function sets `last-coding-system-used' to the precise coding system
8990 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8991 not fully specified.)  */)
8992   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
8993 {
8994   return code_convert_region (start, end, coding_system, destination, 0, 0);
8995 }
8996
8997 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8998        3, 4, "r\nzCoding system: ",
8999        doc: /* Encode the current region by specified coding system.
9000 When called from a program, takes four arguments:
9001         START, END, CODING-SYSTEM and DESTINATION.
9002 START and END are buffer positions.
9003
9004 Optional 4th arguments DESTINATION specifies where the encoded text goes.
9005 If nil, the region between START and END is replace by the encoded text.
9006 If buffer, the encoded text is inserted in that buffer after point (point
9007 does not move).
9008 In those cases, the length of the encoded text is returned.
9009 If DESTINATION is t, the encoded text is returned.
9010
9011 This function sets `last-coding-system-used' to the precise coding system
9012 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9013 not fully specified.)  */)
9014   (Lisp_Object start, Lisp_Object end, Lisp_Object coding_system, Lisp_Object destination)
9015 {
9016   return code_convert_region (start, end, coding_system, destination, 1, 0);
9017 }
9018
9019 Lisp_Object
9020 code_convert_string (string, coding_system, dst_object,
9021                      encodep, nocopy, norecord)
9022      Lisp_Object string, coding_system, dst_object;
9023      int encodep, nocopy, norecord;
9024 {
9025   struct coding_system coding;
9026   EMACS_INT chars, bytes;
9027
9028   CHECK_STRING (string);
9029   if (NILP (coding_system))
9030     {
9031       if (! norecord)
9032         Vlast_coding_system_used = Qno_conversion;
9033       if (NILP (dst_object))
9034         return (nocopy ? Fcopy_sequence (string) : string);
9035     }
9036
9037   if (NILP (coding_system))
9038     coding_system = Qno_conversion;
9039   else
9040     CHECK_CODING_SYSTEM (coding_system);
9041   if (NILP (dst_object))
9042     dst_object = Qt;
9043   else if (! EQ (dst_object, Qt))
9044     CHECK_BUFFER (dst_object);
9045
9046   setup_coding_system (coding_system, &coding);
9047   coding.mode |= CODING_MODE_LAST_BLOCK;
9048   chars = SCHARS (string);
9049   bytes = SBYTES (string);
9050   if (encodep)
9051     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9052   else
9053     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
9054   if (! norecord)
9055     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
9056
9057   return (BUFFERP (dst_object)
9058           ? make_number (coding.produced_char)
9059           : coding.dst_object);
9060 }
9061
9062
9063 /* Encode or decode STRING according to CODING_SYSTEM.
9064    Do not set Vlast_coding_system_used.
9065
9066    This function is called only from macros DECODE_FILE and
9067    ENCODE_FILE, thus we ignore character composition.  */
9068
9069 Lisp_Object
9070 code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
9071                               int encodep)
9072 {
9073   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
9074 }
9075
9076
9077 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
9078        2, 4, 0,
9079        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
9080
9081 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
9082 if the decoding operation is trivial.
9083
9084 Optional fourth arg BUFFER non-nil means that the decoded text is
9085 inserted in that buffer after point (point does not move).  In this
9086 case, the return value is the length of the decoded text.
9087
9088 This function sets `last-coding-system-used' to the precise coding system
9089 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9090 not fully specified.)  */)
9091   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9092 {
9093   return code_convert_string (string, coding_system, buffer,
9094                               0, ! NILP (nocopy), 0);
9095 }
9096
9097 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
9098        2, 4, 0,
9099        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
9100
9101 Optional third arg NOCOPY non-nil means it is OK to return STRING
9102 itself if the encoding operation is trivial.
9103
9104 Optional fourth arg BUFFER non-nil means that the encoded text is
9105 inserted in that buffer after point (point does not move).  In this
9106 case, the return value is the length of the encoded text.
9107
9108 This function sets `last-coding-system-used' to the precise coding system
9109 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
9110 not fully specified.)  */)
9111   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
9112 {
9113   return code_convert_string (string, coding_system, buffer,
9114                               1, ! NILP (nocopy), 1);
9115 }
9116
9117 \f
9118 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
9119        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
9120 Return the corresponding character.  */)
9121   (Lisp_Object code)
9122 {
9123   Lisp_Object spec, attrs, val;
9124   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
9125   int c;
9126
9127   CHECK_NATNUM (code);
9128   c = XFASTINT (code);
9129   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9130   attrs = AREF (spec, 0);
9131
9132   if (ASCII_BYTE_P (c)
9133       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9134     return code;
9135
9136   val = CODING_ATTR_CHARSET_LIST (attrs);
9137   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9138   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9139   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
9140
9141   if (c <= 0x7F)
9142     charset = charset_roman;
9143   else if (c >= 0xA0 && c < 0xDF)
9144     {
9145       charset = charset_kana;
9146       c -= 0x80;
9147     }
9148   else
9149     {
9150       int s1 = c >> 8, s2 = c & 0xFF;
9151
9152       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
9153           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
9154         error ("Invalid code: %d", code);
9155       SJIS_TO_JIS (c);
9156       charset = charset_kanji;
9157     }
9158   c = DECODE_CHAR (charset, c);
9159   if (c < 0)
9160     error ("Invalid code: %d", code);
9161   return make_number (c);
9162 }
9163
9164
9165 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
9166        doc: /* Encode a Japanese character CH to shift_jis encoding.
9167 Return the corresponding code in SJIS.  */)
9168   (Lisp_Object ch)
9169 {
9170   Lisp_Object spec, attrs, charset_list;
9171   int c;
9172   struct charset *charset;
9173   unsigned code;
9174
9175   CHECK_CHARACTER (ch);
9176   c = XFASTINT (ch);
9177   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
9178   attrs = AREF (spec, 0);
9179
9180   if (ASCII_CHAR_P (c)
9181       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9182     return ch;
9183
9184   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9185   charset = char_charset (c, charset_list, &code);
9186   if (code == CHARSET_INVALID_CODE (charset))
9187     error ("Can't encode by shift_jis encoding: %d", c);
9188   JIS_TO_SJIS (code);
9189
9190   return make_number (code);
9191 }
9192
9193 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
9194        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
9195 Return the corresponding character.  */)
9196   (Lisp_Object code)
9197 {
9198   Lisp_Object spec, attrs, val;
9199   struct charset *charset_roman, *charset_big5, *charset;
9200   int c;
9201
9202   CHECK_NATNUM (code);
9203   c = XFASTINT (code);
9204   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9205   attrs = AREF (spec, 0);
9206
9207   if (ASCII_BYTE_P (c)
9208       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9209     return code;
9210
9211   val = CODING_ATTR_CHARSET_LIST (attrs);
9212   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
9213   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
9214
9215   if (c <= 0x7F)
9216     charset = charset_roman;
9217   else
9218     {
9219       int b1 = c >> 8, b2 = c & 0x7F;
9220       if (b1 < 0xA1 || b1 > 0xFE
9221           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
9222         error ("Invalid code: %d", code);
9223       charset = charset_big5;
9224     }
9225   c = DECODE_CHAR (charset, (unsigned )c);
9226   if (c < 0)
9227     error ("Invalid code: %d", code);
9228   return make_number (c);
9229 }
9230
9231 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
9232        doc: /* Encode the Big5 character CH to BIG5 coding system.
9233 Return the corresponding character code in Big5.  */)
9234   (Lisp_Object ch)
9235 {
9236   Lisp_Object spec, attrs, charset_list;
9237   struct charset *charset;
9238   int c;
9239   unsigned code;
9240
9241   CHECK_CHARACTER (ch);
9242   c = XFASTINT (ch);
9243   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
9244   attrs = AREF (spec, 0);
9245   if (ASCII_CHAR_P (c)
9246       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
9247     return ch;
9248
9249   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
9250   charset = char_charset (c, charset_list, &code);
9251   if (code == CHARSET_INVALID_CODE (charset))
9252     error ("Can't encode by Big5 encoding: %d", c);
9253
9254   return make_number (code);
9255 }
9256
9257 \f
9258 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
9259        Sset_terminal_coding_system_internal, 1, 2, 0,
9260        doc: /* Internal use only.  */)
9261   (Lisp_Object coding_system, Lisp_Object terminal)
9262 {
9263   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9264   CHECK_SYMBOL (coding_system);
9265   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
9266   /* We had better not send unsafe characters to terminal.  */
9267   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
9268   /* Characer composition should be disabled.  */
9269   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9270   terminal_coding->src_multibyte = 1;
9271   terminal_coding->dst_multibyte = 0;
9272   return Qnil;
9273 }
9274
9275 DEFUN ("set-safe-terminal-coding-system-internal",
9276        Fset_safe_terminal_coding_system_internal,
9277        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
9278        doc: /* Internal use only.  */)
9279   (Lisp_Object coding_system)
9280 {
9281   CHECK_SYMBOL (coding_system);
9282   setup_coding_system (Fcheck_coding_system (coding_system),
9283                        &safe_terminal_coding);
9284   /* Characer composition should be disabled.  */
9285   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9286   safe_terminal_coding.src_multibyte = 1;
9287   safe_terminal_coding.dst_multibyte = 0;
9288   return Qnil;
9289 }
9290
9291 DEFUN ("terminal-coding-system", Fterminal_coding_system,
9292        Sterminal_coding_system, 0, 1, 0,
9293        doc: /* Return coding system specified for terminal output on the given terminal.
9294 TERMINAL may be a terminal object, a frame, or nil for the selected
9295 frame's terminal device.  */)
9296   (Lisp_Object terminal)
9297 {
9298   struct coding_system *terminal_coding
9299     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
9300   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
9301
9302   /* For backward compatibility, return nil if it is `undecided'. */
9303   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
9304 }
9305
9306 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
9307        Sset_keyboard_coding_system_internal, 1, 2, 0,
9308        doc: /* Internal use only.  */)
9309   (Lisp_Object coding_system, Lisp_Object terminal)
9310 {
9311   struct terminal *t = get_terminal (terminal, 1);
9312   CHECK_SYMBOL (coding_system);
9313   if (NILP (coding_system))
9314     coding_system = Qno_conversion;
9315   else
9316     Fcheck_coding_system (coding_system);
9317   setup_coding_system (coding_system, TERMINAL_KEYBOARD_CODING (t));
9318   /* Characer composition should be disabled.  */
9319   TERMINAL_KEYBOARD_CODING (t)->common_flags
9320     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
9321   return Qnil;
9322 }
9323
9324 DEFUN ("keyboard-coding-system",
9325        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
9326        doc: /* Return coding system specified for decoding keyboard input.  */)
9327   (Lisp_Object terminal)
9328 {
9329   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
9330                          (get_terminal (terminal, 1))->id);
9331 }
9332
9333 \f
9334 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
9335        Sfind_operation_coding_system,  1, MANY, 0,
9336        doc: /* Choose a coding system for an operation based on the target name.
9337 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
9338 DECODING-SYSTEM is the coding system to use for decoding
9339 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
9340 for encoding (in case OPERATION does encoding).
9341
9342 The first argument OPERATION specifies an I/O primitive:
9343   For file I/O, `insert-file-contents' or `write-region'.
9344   For process I/O, `call-process', `call-process-region', or `start-process'.
9345   For network I/O, `open-network-stream'.
9346
9347 The remaining arguments should be the same arguments that were passed
9348 to the primitive.  Depending on which primitive, one of those arguments
9349 is selected as the TARGET.  For example, if OPERATION does file I/O,
9350 whichever argument specifies the file name is TARGET.
9351
9352 TARGET has a meaning which depends on OPERATION:
9353   For file I/O, TARGET is a file name (except for the special case below).
9354   For process I/O, TARGET is a process name.
9355   For network I/O, TARGET is a service name or a port number.
9356
9357 This function looks up what is specified for TARGET in
9358 `file-coding-system-alist', `process-coding-system-alist',
9359 or `network-coding-system-alist' depending on OPERATION.
9360 They may specify a coding system, a cons of coding systems,
9361 or a function symbol to call.
9362 In the last case, we call the function with one argument,
9363 which is a list of all the arguments given to this function.
9364 If the function can't decide a coding system, it can return
9365 `undecided' so that the normal code-detection is performed.
9366
9367 If OPERATION is `insert-file-contents', the argument corresponding to
9368 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
9369 file name to look up, and BUFFER is a buffer that contains the file's
9370 contents (not yet decoded).  If `file-coding-system-alist' specifies a
9371 function to call for FILENAME, that function should examine the
9372 contents of BUFFER instead of reading the file.
9373
9374 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
9375   (int nargs, Lisp_Object *args)
9376 {
9377   Lisp_Object operation, target_idx, target, val;
9378   register Lisp_Object chain;
9379
9380   if (nargs < 2)
9381     error ("Too few arguments");
9382   operation = args[0];
9383   if (!SYMBOLP (operation)
9384       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
9385     error ("Invalid first argument");
9386   if (nargs < 1 + XINT (target_idx))
9387     error ("Too few arguments for operation: %s",
9388            SDATA (SYMBOL_NAME (operation)));
9389   target = args[XINT (target_idx) + 1];
9390   if (!(STRINGP (target)
9391         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
9392             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
9393         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
9394     error ("Invalid %dth argument", XINT (target_idx) + 1);
9395   if (CONSP (target))
9396     target = XCAR (target);
9397
9398   chain = ((EQ (operation, Qinsert_file_contents)
9399             || EQ (operation, Qwrite_region))
9400            ? Vfile_coding_system_alist
9401            : (EQ (operation, Qopen_network_stream)
9402               ? Vnetwork_coding_system_alist
9403               : Vprocess_coding_system_alist));
9404   if (NILP (chain))
9405     return Qnil;
9406
9407   for (; CONSP (chain); chain = XCDR (chain))
9408     {
9409       Lisp_Object elt;
9410
9411       elt = XCAR (chain);
9412       if (CONSP (elt)
9413           && ((STRINGP (target)
9414                && STRINGP (XCAR (elt))
9415                && fast_string_match (XCAR (elt), target) >= 0)
9416               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
9417         {
9418           val = XCDR (elt);
9419           /* Here, if VAL is both a valid coding system and a valid
9420              function symbol, we return VAL as a coding system.  */
9421           if (CONSP (val))
9422             return val;
9423           if (! SYMBOLP (val))
9424             return Qnil;
9425           if (! NILP (Fcoding_system_p (val)))
9426             return Fcons (val, val);
9427           if (! NILP (Ffboundp (val)))
9428             {
9429               /* We use call1 rather than safe_call1
9430                  so as to get bug reports about functions called here
9431                  which don't handle the current interface.  */
9432               val = call1 (val, Flist (nargs, args));
9433               if (CONSP (val))
9434                 return val;
9435               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
9436                 return Fcons (val, val);
9437             }
9438           return Qnil;
9439         }
9440     }
9441   return Qnil;
9442 }
9443
9444 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
9445        Sset_coding_system_priority, 0, MANY, 0,
9446        doc: /* Assign higher priority to the coding systems given as arguments.
9447 If multiple coding systems belong to the same category,
9448 all but the first one are ignored.
9449
9450 usage: (set-coding-system-priority &rest coding-systems)  */)
9451   (int nargs, Lisp_Object *args)
9452 {
9453   int i, j;
9454   int changed[coding_category_max];
9455   enum coding_category priorities[coding_category_max];
9456
9457   memset (changed, 0, sizeof changed);
9458
9459   for (i = j = 0; i < nargs; i++)
9460     {
9461       enum coding_category category;
9462       Lisp_Object spec, attrs;
9463
9464       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
9465       attrs = AREF (spec, 0);
9466       category = XINT (CODING_ATTR_CATEGORY (attrs));
9467       if (changed[category])
9468         /* Ignore this coding system because a coding system of the
9469            same category already had a higher priority.  */
9470         continue;
9471       changed[category] = 1;
9472       priorities[j++] = category;
9473       if (coding_categories[category].id >= 0
9474           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
9475         setup_coding_system (args[i], &coding_categories[category]);
9476       Fset (AREF (Vcoding_category_table, category), args[i]);
9477     }
9478
9479   /* Now we have decided top J priorities.  Reflect the order of the
9480      original priorities to the remaining priorities.  */
9481
9482   for (i = j, j = 0; i < coding_category_max; i++, j++)
9483     {
9484       while (j < coding_category_max
9485              && changed[coding_priorities[j]])
9486         j++;
9487       if (j == coding_category_max)
9488         abort ();
9489       priorities[i] = coding_priorities[j];
9490     }
9491
9492   memcpy (coding_priorities, priorities, sizeof priorities);
9493
9494   /* Update `coding-category-list'.  */
9495   Vcoding_category_list = Qnil;
9496   for (i = coding_category_max - 1; i >= 0; i--)
9497     Vcoding_category_list
9498       = Fcons (AREF (Vcoding_category_table, priorities[i]),
9499                Vcoding_category_list);
9500
9501   return Qnil;
9502 }
9503
9504 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
9505        Scoding_system_priority_list, 0, 1, 0,
9506        doc: /* Return a list of coding systems ordered by their priorities.
9507 The list contains a subset of coding systems; i.e. coding systems
9508 assigned to each coding category (see `coding-category-list').
9509
9510 HIGHESTP non-nil means just return the highest priority one.  */)
9511   (Lisp_Object highestp)
9512 {
9513   int i;
9514   Lisp_Object val;
9515
9516   for (i = 0, val = Qnil; i < coding_category_max; i++)
9517     {
9518       enum coding_category category = coding_priorities[i];
9519       int id = coding_categories[category].id;
9520       Lisp_Object attrs;
9521
9522       if (id < 0)
9523         continue;
9524       attrs = CODING_ID_ATTRS (id);
9525       if (! NILP (highestp))
9526         return CODING_ATTR_BASE_NAME (attrs);
9527       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
9528     }
9529   return Fnreverse (val);
9530 }
9531
9532 static const char *const suffixes[] = { "-unix", "-dos", "-mac" };
9533
9534 static Lisp_Object
9535 make_subsidiaries (Lisp_Object base)
9536 {
9537   Lisp_Object subsidiaries;
9538   int base_name_len = SBYTES (SYMBOL_NAME (base));
9539   char *buf = (char *) alloca (base_name_len + 6);
9540   int i;
9541
9542   memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
9543   subsidiaries = Fmake_vector (make_number (3), Qnil);
9544   for (i = 0; i < 3; i++)
9545     {
9546       memcpy (buf + base_name_len, suffixes[i], strlen (suffixes[i]) + 1);
9547       ASET (subsidiaries, i, intern (buf));
9548     }
9549   return subsidiaries;
9550 }
9551
9552
9553 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
9554        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
9555        doc: /* For internal use only.
9556 usage: (define-coding-system-internal ...)  */)
9557   (int nargs, Lisp_Object *args)
9558 {
9559   Lisp_Object name;
9560   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
9561   Lisp_Object attrs;            /* Vector of attributes.  */
9562   Lisp_Object eol_type;
9563   Lisp_Object aliases;
9564   Lisp_Object coding_type, charset_list, safe_charsets;
9565   enum coding_category category;
9566   Lisp_Object tail, val;
9567   int max_charset_id = 0;
9568   int i;
9569
9570   if (nargs < coding_arg_max)
9571     goto short_args;
9572
9573   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
9574
9575   name = args[coding_arg_name];
9576   CHECK_SYMBOL (name);
9577   CODING_ATTR_BASE_NAME (attrs) = name;
9578
9579   val = args[coding_arg_mnemonic];
9580   if (! STRINGP (val))
9581     CHECK_CHARACTER (val);
9582   CODING_ATTR_MNEMONIC (attrs) = val;
9583
9584   coding_type = args[coding_arg_coding_type];
9585   CHECK_SYMBOL (coding_type);
9586   CODING_ATTR_TYPE (attrs) = coding_type;
9587
9588   charset_list = args[coding_arg_charset_list];
9589   if (SYMBOLP (charset_list))
9590     {
9591       if (EQ (charset_list, Qiso_2022))
9592         {
9593           if (! EQ (coding_type, Qiso_2022))
9594             error ("Invalid charset-list");
9595           charset_list = Viso_2022_charset_list;
9596         }
9597       else if (EQ (charset_list, Qemacs_mule))
9598         {
9599           if (! EQ (coding_type, Qemacs_mule))
9600             error ("Invalid charset-list");
9601           charset_list = Vemacs_mule_charset_list;
9602         }
9603       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9604         if (max_charset_id < XFASTINT (XCAR (tail)))
9605           max_charset_id = XFASTINT (XCAR (tail));
9606     }
9607   else
9608     {
9609       charset_list = Fcopy_sequence (charset_list);
9610       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9611         {
9612           struct charset *charset;
9613
9614           val = XCAR (tail);
9615           CHECK_CHARSET_GET_CHARSET (val, charset);
9616           if (EQ (coding_type, Qiso_2022)
9617               ? CHARSET_ISO_FINAL (charset) < 0
9618               : EQ (coding_type, Qemacs_mule)
9619               ? CHARSET_EMACS_MULE_ID (charset) < 0
9620               : 0)
9621             error ("Can't handle charset `%s'",
9622                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9623
9624           XSETCAR (tail, make_number (charset->id));
9625           if (max_charset_id < charset->id)
9626             max_charset_id = charset->id;
9627         }
9628     }
9629   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
9630
9631   safe_charsets = make_uninit_string (max_charset_id + 1);
9632   memset (SDATA (safe_charsets), 255, max_charset_id + 1);
9633   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9634     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
9635   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
9636
9637   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
9638
9639   val = args[coding_arg_decode_translation_table];
9640   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9641     CHECK_SYMBOL (val);
9642   CODING_ATTR_DECODE_TBL (attrs) = val;
9643
9644   val = args[coding_arg_encode_translation_table];
9645   if (! CHAR_TABLE_P (val) && ! CONSP (val))
9646     CHECK_SYMBOL (val);
9647   CODING_ATTR_ENCODE_TBL (attrs) = val;
9648
9649   val = args[coding_arg_post_read_conversion];
9650   CHECK_SYMBOL (val);
9651   CODING_ATTR_POST_READ (attrs) = val;
9652
9653   val = args[coding_arg_pre_write_conversion];
9654   CHECK_SYMBOL (val);
9655   CODING_ATTR_PRE_WRITE (attrs) = val;
9656
9657   val = args[coding_arg_default_char];
9658   if (NILP (val))
9659     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
9660   else
9661     {
9662       CHECK_CHARACTER (val);
9663       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9664     }
9665
9666   val = args[coding_arg_for_unibyte];
9667   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
9668
9669   val = args[coding_arg_plist];
9670   CHECK_LIST (val);
9671   CODING_ATTR_PLIST (attrs) = val;
9672
9673   if (EQ (coding_type, Qcharset))
9674     {
9675       /* Generate a lisp vector of 256 elements.  Each element is nil,
9676          integer, or a list of charset IDs.
9677
9678          If Nth element is nil, the byte code N is invalid in this
9679          coding system.
9680
9681          If Nth element is a number NUM, N is the first byte of a
9682          charset whose ID is NUM.
9683
9684          If Nth element is a list of charset IDs, N is the first byte
9685          of one of them.  The list is sorted by dimensions of the
9686          charsets.  A charset of smaller dimension comes firtst. */
9687       val = Fmake_vector (make_number (256), Qnil);
9688
9689       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
9690         {
9691           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
9692           int dim = CHARSET_DIMENSION (charset);
9693           int idx = (dim - 1) * 4;
9694
9695           if (CHARSET_ASCII_COMPATIBLE_P (charset))
9696             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9697
9698           for (i = charset->code_space[idx];
9699                i <= charset->code_space[idx + 1]; i++)
9700             {
9701               Lisp_Object tmp, tmp2;
9702               int dim2;
9703
9704               tmp = AREF (val, i);
9705               if (NILP (tmp))
9706                 tmp = XCAR (tail);
9707               else if (NUMBERP (tmp))
9708                 {
9709                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
9710                   if (dim < dim2)
9711                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
9712                   else
9713                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
9714                 }
9715               else
9716                 {
9717                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
9718                     {
9719                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
9720                       if (dim < dim2)
9721                         break;
9722                     }
9723                   if (NILP (tmp2))
9724                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
9725                   else
9726                     {
9727                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
9728                       XSETCAR (tmp2, XCAR (tail));
9729                     }
9730                 }
9731               ASET (val, i, tmp);
9732             }
9733         }
9734       ASET (attrs, coding_attr_charset_valids, val);
9735       category = coding_category_charset;
9736     }
9737   else if (EQ (coding_type, Qccl))
9738     {
9739       Lisp_Object valids;
9740
9741       if (nargs < coding_arg_ccl_max)
9742         goto short_args;
9743
9744       val = args[coding_arg_ccl_decoder];
9745       CHECK_CCL_PROGRAM (val);
9746       if (VECTORP (val))
9747         val = Fcopy_sequence (val);
9748       ASET (attrs, coding_attr_ccl_decoder, val);
9749
9750       val = args[coding_arg_ccl_encoder];
9751       CHECK_CCL_PROGRAM (val);
9752       if (VECTORP (val))
9753         val = Fcopy_sequence (val);
9754       ASET (attrs, coding_attr_ccl_encoder, val);
9755
9756       val = args[coding_arg_ccl_valids];
9757       valids = Fmake_string (make_number (256), make_number (0));
9758       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9759         {
9760           int from, to;
9761
9762           val = Fcar (tail);
9763           if (INTEGERP (val))
9764             {
9765               from = to = XINT (val);
9766               if (from < 0 || from > 255)
9767                 args_out_of_range_3 (val, make_number (0), make_number (255));
9768             }
9769           else
9770             {
9771               CHECK_CONS (val);
9772               CHECK_NATNUM_CAR (val);
9773               CHECK_NATNUM_CDR (val);
9774               from = XINT (XCAR (val));
9775               if (from > 255)
9776                 args_out_of_range_3 (XCAR (val),
9777                                      make_number (0), make_number (255));
9778               to = XINT (XCDR (val));
9779               if (to < from || to > 255)
9780                 args_out_of_range_3 (XCDR (val),
9781                                      XCAR (val), make_number (255));
9782             }
9783           for (i = from; i <= to; i++)
9784             SSET (valids, i, 1);
9785         }
9786       ASET (attrs, coding_attr_ccl_valids, valids);
9787
9788       category = coding_category_ccl;
9789     }
9790   else if (EQ (coding_type, Qutf_16))
9791     {
9792       Lisp_Object bom, endian;
9793
9794       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9795
9796       if (nargs < coding_arg_utf16_max)
9797         goto short_args;
9798
9799       bom = args[coding_arg_utf16_bom];
9800       if (! NILP (bom) && ! EQ (bom, Qt))
9801         {
9802           CHECK_CONS (bom);
9803           val = XCAR (bom);
9804           CHECK_CODING_SYSTEM (val);
9805           val = XCDR (bom);
9806           CHECK_CODING_SYSTEM (val);
9807         }
9808       ASET (attrs, coding_attr_utf_bom, bom);
9809
9810       endian = args[coding_arg_utf16_endian];
9811       CHECK_SYMBOL (endian);
9812       if (NILP (endian))
9813         endian = Qbig;
9814       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9815         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9816       ASET (attrs, coding_attr_utf_16_endian, endian);
9817
9818       category = (CONSP (bom)
9819                   ? coding_category_utf_16_auto
9820                   : NILP (bom)
9821                   ? (EQ (endian, Qbig)
9822                      ? coding_category_utf_16_be_nosig
9823                      : coding_category_utf_16_le_nosig)
9824                   : (EQ (endian, Qbig)
9825                      ? coding_category_utf_16_be
9826                      : coding_category_utf_16_le));
9827     }
9828   else if (EQ (coding_type, Qiso_2022))
9829     {
9830       Lisp_Object initial, reg_usage, request, flags;
9831       int i;
9832
9833       if (nargs < coding_arg_iso2022_max)
9834         goto short_args;
9835
9836       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9837       CHECK_VECTOR (initial);
9838       for (i = 0; i < 4; i++)
9839         {
9840           val = Faref (initial, make_number (i));
9841           if (! NILP (val))
9842             {
9843               struct charset *charset;
9844
9845               CHECK_CHARSET_GET_CHARSET (val, charset);
9846               ASET (initial, i, make_number (CHARSET_ID (charset)));
9847               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9848                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9849             }
9850           else
9851             ASET (initial, i, make_number (-1));
9852         }
9853
9854       reg_usage = args[coding_arg_iso2022_reg_usage];
9855       CHECK_CONS (reg_usage);
9856       CHECK_NUMBER_CAR (reg_usage);
9857       CHECK_NUMBER_CDR (reg_usage);
9858
9859       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9860       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9861         {
9862           int id;
9863           Lisp_Object tmp;
9864
9865           val = Fcar (tail);
9866           CHECK_CONS (val);
9867           tmp = XCAR (val);
9868           CHECK_CHARSET_GET_ID (tmp, id);
9869           CHECK_NATNUM_CDR (val);
9870           if (XINT (XCDR (val)) >= 4)
9871             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9872           XSETCAR (val, make_number (id));
9873         }
9874
9875       flags = args[coding_arg_iso2022_flags];
9876       CHECK_NATNUM (flags);
9877       i = XINT (flags);
9878       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9879         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9880
9881       ASET (attrs, coding_attr_iso_initial, initial);
9882       ASET (attrs, coding_attr_iso_usage, reg_usage);
9883       ASET (attrs, coding_attr_iso_request, request);
9884       ASET (attrs, coding_attr_iso_flags, flags);
9885       setup_iso_safe_charsets (attrs);
9886
9887       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9888         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9889                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9890                     ? coding_category_iso_7_else
9891                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9892                     ? coding_category_iso_7
9893                     : coding_category_iso_7_tight);
9894       else
9895         {
9896           int id = XINT (AREF (initial, 1));
9897
9898           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9899                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9900                        || id < 0)
9901                       ? coding_category_iso_8_else
9902                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9903                       ? coding_category_iso_8_1
9904                       : coding_category_iso_8_2);
9905         }
9906       if (category != coding_category_iso_8_1
9907           && category != coding_category_iso_8_2)
9908         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9909     }
9910   else if (EQ (coding_type, Qemacs_mule))
9911     {
9912       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9913         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9914       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9915       category = coding_category_emacs_mule;
9916     }
9917   else if (EQ (coding_type, Qshift_jis))
9918     {
9919
9920       struct charset *charset;
9921
9922       if (XINT (Flength (charset_list)) != 3
9923           && XINT (Flength (charset_list)) != 4)
9924         error ("There should be three or four charsets");
9925
9926       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9927       if (CHARSET_DIMENSION (charset) != 1)
9928         error ("Dimension of charset %s is not one",
9929                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9930       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9931         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9932
9933       charset_list = XCDR (charset_list);
9934       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9935       if (CHARSET_DIMENSION (charset) != 1)
9936         error ("Dimension of charset %s is not one",
9937                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9938
9939       charset_list = XCDR (charset_list);
9940       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9941       if (CHARSET_DIMENSION (charset) != 2)
9942         error ("Dimension of charset %s is not two",
9943                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9944
9945       charset_list = XCDR (charset_list);
9946       if (! NILP (charset_list))
9947         {
9948           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9949           if (CHARSET_DIMENSION (charset) != 2)
9950             error ("Dimension of charset %s is not two",
9951                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9952         }
9953
9954       category = coding_category_sjis;
9955       Vsjis_coding_system = name;
9956     }
9957   else if (EQ (coding_type, Qbig5))
9958     {
9959       struct charset *charset;
9960
9961       if (XINT (Flength (charset_list)) != 2)
9962         error ("There should be just two charsets");
9963
9964       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9965       if (CHARSET_DIMENSION (charset) != 1)
9966         error ("Dimension of charset %s is not one",
9967                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9968       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9969         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9970
9971       charset_list = XCDR (charset_list);
9972       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9973       if (CHARSET_DIMENSION (charset) != 2)
9974         error ("Dimension of charset %s is not two",
9975                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9976
9977       category = coding_category_big5;
9978       Vbig5_coding_system = name;
9979     }
9980   else if (EQ (coding_type, Qraw_text))
9981     {
9982       category = coding_category_raw_text;
9983       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9984     }
9985   else if (EQ (coding_type, Qutf_8))
9986     {
9987       Lisp_Object bom;
9988
9989       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9990
9991       if (nargs < coding_arg_utf8_max)
9992         goto short_args;
9993
9994       bom = args[coding_arg_utf8_bom];
9995       if (! NILP (bom) && ! EQ (bom, Qt))
9996         {
9997           CHECK_CONS (bom);
9998           val = XCAR (bom);
9999           CHECK_CODING_SYSTEM (val);
10000           val = XCDR (bom);
10001           CHECK_CODING_SYSTEM (val);
10002         }
10003       ASET (attrs, coding_attr_utf_bom, bom);
10004
10005       category = (CONSP (bom) ? coding_category_utf_8_auto
10006                   : NILP (bom) ? coding_category_utf_8_nosig
10007                   : coding_category_utf_8_sig);
10008     }
10009   else if (EQ (coding_type, Qundecided))
10010     category = coding_category_undecided;
10011   else
10012     error ("Invalid coding system type: %s",
10013            SDATA (SYMBOL_NAME (coding_type)));
10014
10015   CODING_ATTR_CATEGORY (attrs) = make_number (category);
10016   CODING_ATTR_PLIST (attrs)
10017     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
10018                                 CODING_ATTR_PLIST (attrs)));
10019   CODING_ATTR_PLIST (attrs)
10020     = Fcons (QCascii_compatible_p,
10021              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
10022                     CODING_ATTR_PLIST (attrs)));
10023
10024   eol_type = args[coding_arg_eol_type];
10025   if (! NILP (eol_type)
10026       && ! EQ (eol_type, Qunix)
10027       && ! EQ (eol_type, Qdos)
10028       && ! EQ (eol_type, Qmac))
10029     error ("Invalid eol-type");
10030
10031   aliases = Fcons (name, Qnil);
10032
10033   if (NILP (eol_type))
10034     {
10035       eol_type = make_subsidiaries (name);
10036       for (i = 0; i < 3; i++)
10037         {
10038           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
10039
10040           this_name = AREF (eol_type, i);
10041           this_aliases = Fcons (this_name, Qnil);
10042           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
10043           this_spec = Fmake_vector (make_number (3), attrs);
10044           ASET (this_spec, 1, this_aliases);
10045           ASET (this_spec, 2, this_eol_type);
10046           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
10047           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
10048           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
10049           if (NILP (val))
10050             Vcoding_system_alist
10051               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
10052                        Vcoding_system_alist);
10053         }
10054     }
10055
10056   spec_vec = Fmake_vector (make_number (3), attrs);
10057   ASET (spec_vec, 1, aliases);
10058   ASET (spec_vec, 2, eol_type);
10059
10060   Fputhash (name, spec_vec, Vcoding_system_hash_table);
10061   Vcoding_system_list = Fcons (name, Vcoding_system_list);
10062   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
10063   if (NILP (val))
10064     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
10065                                   Vcoding_system_alist);
10066
10067   {
10068     int id = coding_categories[category].id;
10069
10070     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
10071       setup_coding_system (name, &coding_categories[category]);
10072   }
10073
10074   return Qnil;
10075
10076  short_args:
10077   return Fsignal (Qwrong_number_of_arguments,
10078                   Fcons (intern ("define-coding-system-internal"),
10079                          make_number (nargs)));
10080 }
10081
10082
10083 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
10084        3, 3, 0,
10085        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
10086   (Lisp_Object coding_system, Lisp_Object prop, Lisp_Object val)
10087 {
10088   Lisp_Object spec, attrs;
10089
10090   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10091   attrs = AREF (spec, 0);
10092   if (EQ (prop, QCmnemonic))
10093     {
10094       if (! STRINGP (val))
10095         CHECK_CHARACTER (val);
10096       CODING_ATTR_MNEMONIC (attrs) = val;
10097     }
10098   else if (EQ (prop, QCdefault_char))
10099     {
10100       if (NILP (val))
10101         val = make_number (' ');
10102       else
10103         CHECK_CHARACTER (val);
10104       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
10105     }
10106   else if (EQ (prop, QCdecode_translation_table))
10107     {
10108       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10109         CHECK_SYMBOL (val);
10110       CODING_ATTR_DECODE_TBL (attrs) = val;
10111     }
10112   else if (EQ (prop, QCencode_translation_table))
10113     {
10114       if (! CHAR_TABLE_P (val) && ! CONSP (val))
10115         CHECK_SYMBOL (val);
10116       CODING_ATTR_ENCODE_TBL (attrs) = val;
10117     }
10118   else if (EQ (prop, QCpost_read_conversion))
10119     {
10120       CHECK_SYMBOL (val);
10121       CODING_ATTR_POST_READ (attrs) = val;
10122     }
10123   else if (EQ (prop, QCpre_write_conversion))
10124     {
10125       CHECK_SYMBOL (val);
10126       CODING_ATTR_PRE_WRITE (attrs) = val;
10127     }
10128   else if (EQ (prop, QCascii_compatible_p))
10129     {
10130       CODING_ATTR_ASCII_COMPAT (attrs) = val;
10131     }
10132
10133   CODING_ATTR_PLIST (attrs)
10134     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
10135   return val;
10136 }
10137
10138
10139 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
10140        Sdefine_coding_system_alias, 2, 2, 0,
10141        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
10142   (Lisp_Object alias, Lisp_Object coding_system)
10143 {
10144   Lisp_Object spec, aliases, eol_type, val;
10145
10146   CHECK_SYMBOL (alias);
10147   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10148   aliases = AREF (spec, 1);
10149   /* ALIASES should be a list of length more than zero, and the first
10150      element is a base coding system.  Append ALIAS at the tail of the
10151      list.  */
10152   while (!NILP (XCDR (aliases)))
10153     aliases = XCDR (aliases);
10154   XSETCDR (aliases, Fcons (alias, Qnil));
10155
10156   eol_type = AREF (spec, 2);
10157   if (VECTORP (eol_type))
10158     {
10159       Lisp_Object subsidiaries;
10160       int i;
10161
10162       subsidiaries = make_subsidiaries (alias);
10163       for (i = 0; i < 3; i++)
10164         Fdefine_coding_system_alias (AREF (subsidiaries, i),
10165                                      AREF (eol_type, i));
10166     }
10167
10168   Fputhash (alias, spec, Vcoding_system_hash_table);
10169   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
10170   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
10171   if (NILP (val))
10172     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
10173                                   Vcoding_system_alist);
10174
10175   return Qnil;
10176 }
10177
10178 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
10179        1, 1, 0,
10180        doc: /* Return the base of CODING-SYSTEM.
10181 Any alias or subsidiary coding system is not a base coding system.  */)
10182   (Lisp_Object coding_system)
10183 {
10184   Lisp_Object spec, attrs;
10185
10186   if (NILP (coding_system))
10187     return (Qno_conversion);
10188   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10189   attrs = AREF (spec, 0);
10190   return CODING_ATTR_BASE_NAME (attrs);
10191 }
10192
10193 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
10194        1, 1, 0,
10195        doc: "Return the property list of CODING-SYSTEM.")
10196   (Lisp_Object coding_system)
10197 {
10198   Lisp_Object spec, attrs;
10199
10200   if (NILP (coding_system))
10201     coding_system = Qno_conversion;
10202   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10203   attrs = AREF (spec, 0);
10204   return CODING_ATTR_PLIST (attrs);
10205 }
10206
10207
10208 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
10209        1, 1, 0,
10210        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
10211   (Lisp_Object coding_system)
10212 {
10213   Lisp_Object spec;
10214
10215   if (NILP (coding_system))
10216     coding_system = Qno_conversion;
10217   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
10218   return AREF (spec, 1);
10219 }
10220
10221 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
10222        Scoding_system_eol_type, 1, 1, 0,
10223        doc: /* Return eol-type of CODING-SYSTEM.
10224 An eol-type is an integer 0, 1, 2, or a vector of coding systems.
10225
10226 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
10227 and CR respectively.
10228
10229 A vector value indicates that a format of end-of-line should be
10230 detected automatically.  Nth element of the vector is the subsidiary
10231 coding system whose eol-type is N.  */)
10232   (Lisp_Object coding_system)
10233 {
10234   Lisp_Object spec, eol_type;
10235   int n;
10236
10237   if (NILP (coding_system))
10238     coding_system = Qno_conversion;
10239   if (! CODING_SYSTEM_P (coding_system))
10240     return Qnil;
10241   spec = CODING_SYSTEM_SPEC (coding_system);
10242   eol_type = AREF (spec, 2);
10243   if (VECTORP (eol_type))
10244     return Fcopy_sequence (eol_type);
10245   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
10246   return make_number (n);
10247 }
10248
10249 #endif /* emacs */
10250
10251 \f
10252 /*** 9. Post-amble ***/
10253
10254 void
10255 init_coding_once (void)
10256 {
10257   int i;
10258
10259   for (i = 0; i < coding_category_max; i++)
10260     {
10261       coding_categories[i].id = -1;
10262       coding_priorities[i] = i;
10263     }
10264
10265   /* ISO2022 specific initialize routine.  */
10266   for (i = 0; i < 0x20; i++)
10267     iso_code_class[i] = ISO_control_0;
10268   for (i = 0x21; i < 0x7F; i++)
10269     iso_code_class[i] = ISO_graphic_plane_0;
10270   for (i = 0x80; i < 0xA0; i++)
10271     iso_code_class[i] = ISO_control_1;
10272   for (i = 0xA1; i < 0xFF; i++)
10273     iso_code_class[i] = ISO_graphic_plane_1;
10274   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
10275   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
10276   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
10277   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
10278   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
10279   iso_code_class[ISO_CODE_ESC] = ISO_escape;
10280   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
10281   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
10282   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
10283
10284   for (i = 0; i < 256; i++)
10285     {
10286       emacs_mule_bytes[i] = 1;
10287     }
10288   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
10289   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
10290   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
10291   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
10292 }
10293
10294 #ifdef emacs
10295
10296 void
10297 syms_of_coding (void)
10298 {
10299   staticpro (&Vcoding_system_hash_table);
10300   {
10301     Lisp_Object args[2];
10302     args[0] = QCtest;
10303     args[1] = Qeq;
10304     Vcoding_system_hash_table = Fmake_hash_table (2, args);
10305   }
10306
10307   staticpro (&Vsjis_coding_system);
10308   Vsjis_coding_system = Qnil;
10309
10310   staticpro (&Vbig5_coding_system);
10311   Vbig5_coding_system = Qnil;
10312
10313   staticpro (&Vcode_conversion_reused_workbuf);
10314   Vcode_conversion_reused_workbuf = Qnil;
10315
10316   staticpro (&Vcode_conversion_workbuf_name);
10317   Vcode_conversion_workbuf_name = make_pure_c_string (" *code-conversion-work*");
10318
10319   reused_workbuf_in_use = 0;
10320
10321   DEFSYM (Qcharset, "charset");
10322   DEFSYM (Qtarget_idx, "target-idx");
10323   DEFSYM (Qcoding_system_history, "coding-system-history");
10324   Fset (Qcoding_system_history, Qnil);
10325
10326   /* Target FILENAME is the first argument.  */
10327   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
10328   /* Target FILENAME is the third argument.  */
10329   Fput (Qwrite_region, Qtarget_idx, make_number (2));
10330
10331   DEFSYM (Qcall_process, "call-process");
10332   /* Target PROGRAM is the first argument.  */
10333   Fput (Qcall_process, Qtarget_idx, make_number (0));
10334
10335   DEFSYM (Qcall_process_region, "call-process-region");
10336   /* Target PROGRAM is the third argument.  */
10337   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
10338
10339   DEFSYM (Qstart_process, "start-process");
10340   /* Target PROGRAM is the third argument.  */
10341   Fput (Qstart_process, Qtarget_idx, make_number (2));
10342
10343   DEFSYM (Qopen_network_stream, "open-network-stream");
10344   /* Target SERVICE is the fourth argument.  */
10345   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
10346
10347   DEFSYM (Qcoding_system, "coding-system");
10348   DEFSYM (Qcoding_aliases, "coding-aliases");
10349
10350   DEFSYM (Qeol_type, "eol-type");
10351   DEFSYM (Qunix, "unix");
10352   DEFSYM (Qdos, "dos");
10353
10354   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
10355   DEFSYM (Qpost_read_conversion, "post-read-conversion");
10356   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
10357   DEFSYM (Qdefault_char, "default-char");
10358   DEFSYM (Qundecided, "undecided");
10359   DEFSYM (Qno_conversion, "no-conversion");
10360   DEFSYM (Qraw_text, "raw-text");
10361
10362   DEFSYM (Qiso_2022, "iso-2022");
10363
10364   DEFSYM (Qutf_8, "utf-8");
10365   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
10366
10367   DEFSYM (Qutf_16, "utf-16");
10368   DEFSYM (Qbig, "big");
10369   DEFSYM (Qlittle, "little");
10370
10371   DEFSYM (Qshift_jis, "shift-jis");
10372   DEFSYM (Qbig5, "big5");
10373
10374   DEFSYM (Qcoding_system_p, "coding-system-p");
10375
10376   DEFSYM (Qcoding_system_error, "coding-system-error");
10377   Fput (Qcoding_system_error, Qerror_conditions,
10378         pure_cons (Qcoding_system_error, pure_cons (Qerror, Qnil)));
10379   Fput (Qcoding_system_error, Qerror_message,
10380         make_pure_c_string ("Invalid coding system"));
10381
10382   /* Intern this now in case it isn't already done.
10383      Setting this variable twice is harmless.
10384      But don't staticpro it here--that is done in alloc.c.  */
10385   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
10386
10387   DEFSYM (Qtranslation_table, "translation-table");
10388   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
10389   DEFSYM (Qtranslation_table_id, "translation-table-id");
10390   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
10391   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
10392
10393   DEFSYM (Qvalid_codes, "valid-codes");
10394
10395   DEFSYM (Qemacs_mule, "emacs-mule");
10396
10397   DEFSYM (QCcategory, ":category");
10398   DEFSYM (QCmnemonic, ":mnemonic");
10399   DEFSYM (QCdefault_char, ":default-char");
10400   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
10401   DEFSYM (QCencode_translation_table, ":encode-translation-table");
10402   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
10403   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
10404   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
10405
10406   Vcoding_category_table
10407     = Fmake_vector (make_number (coding_category_max), Qnil);
10408   staticpro (&Vcoding_category_table);
10409   /* Followings are target of code detection.  */
10410   ASET (Vcoding_category_table, coding_category_iso_7,
10411         intern_c_string ("coding-category-iso-7"));
10412   ASET (Vcoding_category_table, coding_category_iso_7_tight,
10413         intern_c_string ("coding-category-iso-7-tight"));
10414   ASET (Vcoding_category_table, coding_category_iso_8_1,
10415         intern_c_string ("coding-category-iso-8-1"));
10416   ASET (Vcoding_category_table, coding_category_iso_8_2,
10417         intern_c_string ("coding-category-iso-8-2"));
10418   ASET (Vcoding_category_table, coding_category_iso_7_else,
10419         intern_c_string ("coding-category-iso-7-else"));
10420   ASET (Vcoding_category_table, coding_category_iso_8_else,
10421         intern_c_string ("coding-category-iso-8-else"));
10422   ASET (Vcoding_category_table, coding_category_utf_8_auto,
10423         intern_c_string ("coding-category-utf-8-auto"));
10424   ASET (Vcoding_category_table, coding_category_utf_8_nosig,
10425         intern_c_string ("coding-category-utf-8"));
10426   ASET (Vcoding_category_table, coding_category_utf_8_sig,
10427         intern_c_string ("coding-category-utf-8-sig"));
10428   ASET (Vcoding_category_table, coding_category_utf_16_be,
10429         intern_c_string ("coding-category-utf-16-be"));
10430   ASET (Vcoding_category_table, coding_category_utf_16_auto,
10431         intern_c_string ("coding-category-utf-16-auto"));
10432   ASET (Vcoding_category_table, coding_category_utf_16_le,
10433         intern_c_string ("coding-category-utf-16-le"));
10434   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
10435         intern_c_string ("coding-category-utf-16-be-nosig"));
10436   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
10437         intern_c_string ("coding-category-utf-16-le-nosig"));
10438   ASET (Vcoding_category_table, coding_category_charset,
10439         intern_c_string ("coding-category-charset"));
10440   ASET (Vcoding_category_table, coding_category_sjis,
10441         intern_c_string ("coding-category-sjis"));
10442   ASET (Vcoding_category_table, coding_category_big5,
10443         intern_c_string ("coding-category-big5"));
10444   ASET (Vcoding_category_table, coding_category_ccl,
10445         intern_c_string ("coding-category-ccl"));
10446   ASET (Vcoding_category_table, coding_category_emacs_mule,
10447         intern_c_string ("coding-category-emacs-mule"));
10448   /* Followings are NOT target of code detection.  */
10449   ASET (Vcoding_category_table, coding_category_raw_text,
10450         intern_c_string ("coding-category-raw-text"));
10451   ASET (Vcoding_category_table, coding_category_undecided,
10452         intern_c_string ("coding-category-undecided"));
10453
10454   DEFSYM (Qinsufficient_source, "insufficient-source");
10455   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
10456   DEFSYM (Qinvalid_source, "invalid-source");
10457   DEFSYM (Qinterrupted, "interrupted");
10458   DEFSYM (Qinsufficient_memory, "insufficient-memory");
10459   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
10460
10461   defsubr (&Scoding_system_p);
10462   defsubr (&Sread_coding_system);
10463   defsubr (&Sread_non_nil_coding_system);
10464   defsubr (&Scheck_coding_system);
10465   defsubr (&Sdetect_coding_region);
10466   defsubr (&Sdetect_coding_string);
10467   defsubr (&Sfind_coding_systems_region_internal);
10468   defsubr (&Sunencodable_char_position);
10469   defsubr (&Scheck_coding_systems_region);
10470   defsubr (&Sdecode_coding_region);
10471   defsubr (&Sencode_coding_region);
10472   defsubr (&Sdecode_coding_string);
10473   defsubr (&Sencode_coding_string);
10474   defsubr (&Sdecode_sjis_char);
10475   defsubr (&Sencode_sjis_char);
10476   defsubr (&Sdecode_big5_char);
10477   defsubr (&Sencode_big5_char);
10478   defsubr (&Sset_terminal_coding_system_internal);
10479   defsubr (&Sset_safe_terminal_coding_system_internal);
10480   defsubr (&Sterminal_coding_system);
10481   defsubr (&Sset_keyboard_coding_system_internal);
10482   defsubr (&Skeyboard_coding_system);
10483   defsubr (&Sfind_operation_coding_system);
10484   defsubr (&Sset_coding_system_priority);
10485   defsubr (&Sdefine_coding_system_internal);
10486   defsubr (&Sdefine_coding_system_alias);
10487   defsubr (&Scoding_system_put);
10488   defsubr (&Scoding_system_base);
10489   defsubr (&Scoding_system_plist);
10490   defsubr (&Scoding_system_aliases);
10491   defsubr (&Scoding_system_eol_type);
10492   defsubr (&Scoding_system_priority_list);
10493
10494   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
10495                doc: /* List of coding systems.
10496
10497 Do not alter the value of this variable manually.  This variable should be
10498 updated by the functions `define-coding-system' and
10499 `define-coding-system-alias'.  */);
10500   Vcoding_system_list = Qnil;
10501
10502   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
10503                doc: /* Alist of coding system names.
10504 Each element is one element list of coding system name.
10505 This variable is given to `completing-read' as COLLECTION argument.
10506
10507 Do not alter the value of this variable manually.  This variable should be
10508 updated by the functions `make-coding-system' and
10509 `define-coding-system-alias'.  */);
10510   Vcoding_system_alist = Qnil;
10511
10512   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
10513                doc: /* List of coding-categories (symbols) ordered by priority.
10514
10515 On detecting a coding system, Emacs tries code detection algorithms
10516 associated with each coding-category one by one in this order.  When
10517 one algorithm agrees with a byte sequence of source text, the coding
10518 system bound to the corresponding coding-category is selected.
10519
10520 Don't modify this variable directly, but use `set-coding-priority'.  */);
10521   {
10522     int i;
10523
10524     Vcoding_category_list = Qnil;
10525     for (i = coding_category_max - 1; i >= 0; i--)
10526       Vcoding_category_list
10527         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
10528                  Vcoding_category_list);
10529   }
10530
10531   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
10532                doc: /* Specify the coding system for read operations.
10533 It is useful to bind this variable with `let', but do not set it globally.
10534 If the value is a coding system, it is used for decoding on read operation.
10535 If not, an appropriate element is used from one of the coding system alists.
10536 There are three such tables: `file-coding-system-alist',
10537 `process-coding-system-alist', and `network-coding-system-alist'.  */);
10538   Vcoding_system_for_read = Qnil;
10539
10540   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
10541                doc: /* Specify the coding system for write operations.
10542 Programs bind this variable with `let', but you should not set it globally.
10543 If the value is a coding system, it is used for encoding of output,
10544 when writing it to a file and when sending it to a file or subprocess.
10545
10546 If this does not specify a coding system, an appropriate element
10547 is used from one of the coding system alists.
10548 There are three such tables: `file-coding-system-alist',
10549 `process-coding-system-alist', and `network-coding-system-alist'.
10550 For output to files, if the above procedure does not specify a coding system,
10551 the value of `buffer-file-coding-system' is used.  */);
10552   Vcoding_system_for_write = Qnil;
10553
10554   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
10555                doc: /*
10556 Coding system used in the latest file or process I/O.  */);
10557   Vlast_coding_system_used = Qnil;
10558
10559   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
10560                doc: /*
10561 Error status of the last code conversion.
10562
10563 When an error was detected in the last code conversion, this variable
10564 is set to one of the following symbols.
10565   `insufficient-source'
10566   `inconsistent-eol'
10567   `invalid-source'
10568   `interrupted'
10569   `insufficient-memory'
10570 When no error was detected, the value doesn't change.  So, to check
10571 the error status of a code conversion by this variable, you must
10572 explicitly set this variable to nil before performing code
10573 conversion.  */);
10574   Vlast_code_conversion_error = Qnil;
10575
10576   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
10577                doc: /*
10578 *Non-nil means always inhibit code conversion of end-of-line format.
10579 See info node `Coding Systems' and info node `Text and Binary' concerning
10580 such conversion.  */);
10581   inhibit_eol_conversion = 0;
10582
10583   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
10584                doc: /*
10585 Non-nil means process buffer inherits coding system of process output.
10586 Bind it to t if the process output is to be treated as if it were a file
10587 read from some filesystem.  */);
10588   inherit_process_coding_system = 0;
10589
10590   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
10591                doc: /*
10592 Alist to decide a coding system to use for a file I/O operation.
10593 The format is ((PATTERN . VAL) ...),
10594 where PATTERN is a regular expression matching a file name,
10595 VAL is a coding system, a cons of coding systems, or a function symbol.
10596 If VAL is a coding system, it is used for both decoding and encoding
10597 the file contents.
10598 If VAL is a cons of coding systems, the car part is used for decoding,
10599 and the cdr part is used for encoding.
10600 If VAL is a function symbol, the function must return a coding system
10601 or a cons of coding systems which are used as above.  The function is
10602 called with an argument that is a list of the arguments with which
10603 `find-operation-coding-system' was called.  If the function can't decide
10604 a coding system, it can return `undecided' so that the normal
10605 code-detection is performed.
10606
10607 See also the function `find-operation-coding-system'
10608 and the variable `auto-coding-alist'.  */);
10609   Vfile_coding_system_alist = Qnil;
10610
10611   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
10612                doc: /*
10613 Alist to decide a coding system to use for a process I/O operation.
10614 The format is ((PATTERN . VAL) ...),
10615 where PATTERN is a regular expression matching a program name,
10616 VAL is a coding system, a cons of coding systems, or a function symbol.
10617 If VAL is a coding system, it is used for both decoding what received
10618 from the program and encoding what sent to the program.
10619 If VAL is a cons of coding systems, the car part is used for decoding,
10620 and the cdr part is used for encoding.
10621 If VAL is a function symbol, the function must return a coding system
10622 or a cons of coding systems which are used as above.
10623
10624 See also the function `find-operation-coding-system'.  */);
10625   Vprocess_coding_system_alist = Qnil;
10626
10627   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
10628                doc: /*
10629 Alist to decide a coding system to use for a network I/O operation.
10630 The format is ((PATTERN . VAL) ...),
10631 where PATTERN is a regular expression matching a network service name
10632 or is a port number to connect to,
10633 VAL is a coding system, a cons of coding systems, or a function symbol.
10634 If VAL is a coding system, it is used for both decoding what received
10635 from the network stream and encoding what sent to the network stream.
10636 If VAL is a cons of coding systems, the car part is used for decoding,
10637 and the cdr part is used for encoding.
10638 If VAL is a function symbol, the function must return a coding system
10639 or a cons of coding systems which are used as above.
10640
10641 See also the function `find-operation-coding-system'.  */);
10642   Vnetwork_coding_system_alist = Qnil;
10643
10644   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
10645                doc: /* Coding system to use with system messages.
10646 Also used for decoding keyboard input on X Window system.  */);
10647   Vlocale_coding_system = Qnil;
10648
10649   /* The eol mnemonics are reset in startup.el system-dependently.  */
10650   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
10651                doc: /*
10652 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
10653   eol_mnemonic_unix = make_pure_c_string (":");
10654
10655   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
10656                doc: /*
10657 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
10658   eol_mnemonic_dos = make_pure_c_string ("\\");
10659
10660   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
10661                doc: /*
10662 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
10663   eol_mnemonic_mac = make_pure_c_string ("/");
10664
10665   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
10666                doc: /*
10667 *String displayed in mode line when end-of-line format is not yet determined.  */);
10668   eol_mnemonic_undecided = make_pure_c_string (":");
10669
10670   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
10671                doc: /*
10672 *Non-nil enables character translation while encoding and decoding.  */);
10673   Venable_character_translation = Qt;
10674
10675   DEFVAR_LISP ("standard-translation-table-for-decode",
10676                &Vstandard_translation_table_for_decode,
10677                doc: /* Table for translating characters while decoding.  */);
10678   Vstandard_translation_table_for_decode = Qnil;
10679
10680   DEFVAR_LISP ("standard-translation-table-for-encode",
10681                &Vstandard_translation_table_for_encode,
10682                doc: /* Table for translating characters while encoding.  */);
10683   Vstandard_translation_table_for_encode = Qnil;
10684
10685   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
10686                doc: /* Alist of charsets vs revision numbers.
10687 While encoding, if a charset (car part of an element) is found,
10688 designate it with the escape sequence identifying revision (cdr part
10689 of the element).  */);
10690   Vcharset_revision_table = Qnil;
10691
10692   DEFVAR_LISP ("default-process-coding-system",
10693                &Vdefault_process_coding_system,
10694                doc: /* Cons of coding systems used for process I/O by default.
10695 The car part is used for decoding a process output,
10696 the cdr part is used for encoding a text to be sent to a process.  */);
10697   Vdefault_process_coding_system = Qnil;
10698
10699   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
10700                doc: /*
10701 Table of extra Latin codes in the range 128..159 (inclusive).
10702 This is a vector of length 256.
10703 If Nth element is non-nil, the existence of code N in a file
10704 \(or output of subprocess) doesn't prevent it to be detected as
10705 a coding system of ISO 2022 variant which has a flag
10706 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
10707 or reading output of a subprocess.
10708 Only 128th through 159th elements have a meaning.  */);
10709   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
10710
10711   DEFVAR_LISP ("select-safe-coding-system-function",
10712                &Vselect_safe_coding_system_function,
10713                doc: /*
10714 Function to call to select safe coding system for encoding a text.
10715
10716 If set, this function is called to force a user to select a proper
10717 coding system which can encode the text in the case that a default
10718 coding system used in each operation can't encode the text.  The
10719 function should take care that the buffer is not modified while
10720 the coding system is being selected.
10721
10722 The default value is `select-safe-coding-system' (which see).  */);
10723   Vselect_safe_coding_system_function = Qnil;
10724
10725   DEFVAR_BOOL ("coding-system-require-warning",
10726                &coding_system_require_warning,
10727                doc: /* Internal use only.
10728 If non-nil, on writing a file, `select-safe-coding-system-function' is
10729 called even if `coding-system-for-write' is non-nil.  The command
10730 `universal-coding-system-argument' binds this variable to t temporarily.  */);
10731   coding_system_require_warning = 0;
10732
10733
10734   DEFVAR_BOOL ("inhibit-iso-escape-detection",
10735                &inhibit_iso_escape_detection,
10736                doc: /*
10737 If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
10738
10739 When Emacs reads text, it tries to detect how the text is encoded.
10740 This code detection is sensitive to escape sequences.  If Emacs sees
10741 a valid ISO-2022 escape sequence, it assumes the text is encoded in one
10742 of the ISO2022 encodings, and decodes text by the corresponding coding
10743 system (e.g. `iso-2022-7bit').
10744
10745 However, there may be a case that you want to read escape sequences in
10746 a file as is.  In such a case, you can set this variable to non-nil.
10747 Then the code detection will ignore any escape sequences, and no text is
10748 detected as encoded in some ISO-2022 encoding.  The result is that all
10749 escape sequences become visible in a buffer.
10750
10751 The default value is nil, and it is strongly recommended not to change
10752 it.  That is because many Emacs Lisp source files that contain
10753 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
10754 in Emacs's distribution, and they won't be decoded correctly on
10755 reading if you suppress escape sequence detection.
10756
10757 The other way to read escape sequences in a file without decoding is
10758 to explicitly specify some coding system that doesn't use ISO-2022
10759 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
10760   inhibit_iso_escape_detection = 0;
10761
10762   DEFVAR_BOOL ("inhibit-null-byte-detection",
10763                &inhibit_null_byte_detection,
10764                doc: /* If non-nil, Emacs ignores null bytes on code detection.
10765 By default, Emacs treats it as binary data, and does not attempt to
10766 decode it.  The effect is as if you specified `no-conversion' for
10767 reading that text.
10768
10769 Set this to non-nil when a regular text happens to include null bytes.
10770 Examples are Index nodes of Info files and null-byte delimited output
10771 from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
10772 decode text as usual.  */);
10773   inhibit_null_byte_detection = 0;
10774
10775   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
10776                doc: /* Char table for translating self-inserting characters.
10777 This is applied to the result of input methods, not their input.
10778 See also `keyboard-translate-table'.
10779
10780 Use of this variable for character code unification was rendered
10781 obsolete in Emacs 23.1 and later, since Unicode is now the basis of
10782 internal character representation.  */);
10783     Vtranslation_table_for_input = Qnil;
10784
10785   {
10786     Lisp_Object args[coding_arg_max];
10787     Lisp_Object plist[16];
10788     int i;
10789
10790     for (i = 0; i < coding_arg_max; i++)
10791       args[i] = Qnil;
10792
10793     plist[0] = intern_c_string (":name");
10794     plist[1] = args[coding_arg_name] = Qno_conversion;
10795     plist[2] = intern_c_string (":mnemonic");
10796     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10797     plist[4] = intern_c_string (":coding-type");
10798     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10799     plist[6] = intern_c_string (":ascii-compatible-p");
10800     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10801     plist[8] = intern_c_string (":default-char");
10802     plist[9] = args[coding_arg_default_char] = make_number (0);
10803     plist[10] = intern_c_string (":for-unibyte");
10804     plist[11] = args[coding_arg_for_unibyte] = Qt;
10805     plist[12] = intern_c_string (":docstring");
10806     plist[13] = make_pure_c_string ("Do no conversion.\n\
10807 \n\
10808 When you visit a file with this coding, the file is read into a\n\
10809 unibyte buffer as is, thus each byte of a file is treated as a\n\
10810 character.");
10811     plist[14] = intern_c_string (":eol-type");
10812     plist[15] = args[coding_arg_eol_type] = Qunix;
10813     args[coding_arg_plist] = Flist (16, plist);
10814     Fdefine_coding_system_internal (coding_arg_max, args);
10815
10816     plist[1] = args[coding_arg_name] = Qundecided;
10817     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10818     plist[5] = args[coding_arg_coding_type] = Qundecided;
10819     /* This is already set.
10820        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10821     plist[8] = intern_c_string (":charset-list");
10822     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10823     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10824     plist[13] = make_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
10825     plist[15] = args[coding_arg_eol_type] = Qnil;
10826     args[coding_arg_plist] = Flist (16, plist);
10827     Fdefine_coding_system_internal (coding_arg_max, args);
10828   }
10829
10830   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10831
10832   {
10833     int i;
10834
10835     for (i = 0; i < coding_category_max; i++)
10836       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10837   }
10838 #if defined (DOS_NT)
10839   system_eol_type = Qdos;
10840 #else
10841   system_eol_type = Qunix;
10842 #endif
10843   staticpro (&system_eol_type);
10844 }
10845
10846 char *
10847 emacs_strerror (int error_number)
10848 {
10849   char *str;
10850
10851   synchronize_system_messages_locale ();
10852   str = strerror (error_number);
10853
10854   if (! NILP (Vlocale_coding_system))
10855     {
10856       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10857                                                       Vlocale_coding_system,
10858                                                       0);
10859       str = (char *) SDATA (dec);
10860     }
10861
10862   return str;
10863 }
10864
10865 #endif /* emacs */
10866
10867 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10868    (do not change this comment) */