src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software; you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation; either version 3, or (at your option)
  17 any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs; see the file COPYING.  If not, write to
  26 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  27 Boston, MA 02110-1301, USA.  */
  28
  29 /*** TABLE OF CONTENTS ***
  30
  31   0. General comments
  32   1. Preamble
  33   2. Emacs' internal format (emacs-utf-8) handlers
  34   3. UTF-8 handlers
  35   4. UTF-16 handlers
  36   5. Charset-base coding systems handlers
  37   6. emacs-mule (old Emacs' internal format) handlers
  38   7. ISO2022 handlers
  39   8. Shift-JIS and BIG5 handlers
  40   9. CCL handlers
  41   10. C library functions
  42   11. Emacs Lisp library functions
  43   12. Postamble
  44
  45 */
  46
  47 /*** 0. General comments ***
  48
  49
  50 CODING SYSTEM
  51
  52   A coding system is an object for an encoding mechanism that contains
  53   information about how to convert byte sequences to character
  54   sequences and vice versa.  When we say "decode", it means converting
  55   a byte sequence of a specific coding system into a character
  56   sequence that is represented by Emacs' internal coding system
  57   `emacs-utf-8', and when we say "encode", it means converting a
  58   character sequence of emacs-utf-8 to a byte sequence of a specific
  59   coding system.
  60
  61   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  62   C level, a coding system is represented by a vector of attributes
  63   stored in the hash table Vcharset_hash_table.  The conversion from
  64   coding system symbol to attributes vector is done by looking up
  65   Vcharset_hash_table by the symbol.
  66
  67   Coding systems are classified into the following types depending on
  68   the encoding mechanism.  Here's a brief description of the types.
  69
  70   o UTF-8
  71
  72   o UTF-16
  73
  74   o Charset-base coding system
  75
  76   A coding system defined by one or more (coded) character sets.
  77   Decoding and encoding are done by a code converter defined for each
  78   character set.
  79
  80   o Old Emacs internal format (emacs-mule)
  81
  82   The coding system adopted by old versions of Emacs (20 and 21).
  83
  84   o ISO2022-base coding system
  85
  86   The most famous coding system for multiple character sets.  X's
  87   Compound Text, various EUCs (Extended Unix Code), and coding systems
  88   used in the Internet communication such as ISO-2022-JP are all
  89   variants of ISO2022.
  90
  91   o SJIS (or Shift-JIS or MS-Kanji-Code)
  92
  93   A coding system to encode character sets: ASCII, JISX0201, and
  94   JISX0208.  Widely used for PC's in Japan.  Details are described in
  95   section 8.
  96
  97   o BIG5
  98
  99   A coding system to encode character sets: ASCII and Big5.  Widely
 100   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
 101   described in section 8.  In this file, when we write "big5" (all
 102   lowercase), we mean the coding system, and when we write "Big5"
 103   (capitalized), we mean the character set.
 104
 105   o CCL
 106
 107   If a user wants to decode/encode text encoded in a coding system
 108   not listed above, he can supply a decoder and an encoder for it in
 109   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 110   program while decoding/encoding.
 111
 112   o Raw-text
 113
 114   A coding system for text containing raw eight-bit data.  Emacs
 115   treats each byte of source text as a character (except for
 116   end-of-line conversion).
 117
 118   o No-conversion
 119
 120   Like raw text, but don't do end-of-line conversion.
 121
 122
 123 END-OF-LINE FORMAT
 124
 125   How text end-of-line is encoded depends on operating system.  For
 126   instance, Unix's format is just one byte of LF (line-feed) code,
 127   whereas DOS's format is two-byte sequence of `carriage-return' and
 128   `line-feed' codes.  MacOS's format is usually one byte of
 129   `carriage-return'.
 130
 131   Since text character encoding and end-of-line encoding are
 132   independent, any coding system described above can take any format
 133   of end-of-line (except for no-conversion).
 134
 135 STRUCT CODING_SYSTEM
 136
 137   Before using a coding system for code conversion (i.e. decoding and
 138   encoding), we setup a structure of type `struct coding_system'.
 139   This structure keeps various information about a specific code
 140   conversion (e.g. the location of source and destination data).
 141
 142 */
 143
 144 /* COMMON MACROS */
 145
 146
 147 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 148
 149   These functions check if a byte sequence specified as a source in
 150   CODING conforms to the format of XXX, and update the members of
 151   DETECT_INFO.
 152
 153   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 154
 155   Below is the template of these functions.  */
 156
 157 #if 0
 158 static int
 159 detect_coding_XXX (coding, detect_info)
 160      struct coding_system *coding;
 161      struct coding_detection_info *detect_info;
 162 {
 163   const unsigned char *src = coding->source;
 164   const unsigned char *src_end = coding->source + coding->src_bytes;
 165   int multibytep = coding->src_multibyte;
 166   int consumed_chars = 0;
 167   int found = 0;
 168   ...;
 169
 170   while (1)
 171     {
 172       /* Get one byte from the source.  If the souce is exausted, jump
 173          to no_more_source:.  */
 174       ONE_MORE_BYTE (c);
 175
 176       if (! __C_conforms_to_XXX___ (c))
 177         break;
 178       if (! __C_strongly_suggests_XXX__ (c))
 179         found = CATEGORY_MASK_XXX;
 180     }
 181   /* The byte sequence is invalid for XXX.  */
 182   detect_info->rejected |= CATEGORY_MASK_XXX;
 183   return 0;
 184
 185  no_more_source:
 186   /* The source exausted successfully.  */
 187   detect_info->found |= found;
 188   return 1;
 189 }
 190 #endif
 191
 192 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 193
 194   These functions decode a byte sequence specified as a source by
 195   CODING.  The resulting multibyte text goes to a place pointed to by
 196   CODING->charbuf, the length of which should not exceed
 197   CODING->charbuf_size;
 198
 199   These functions set the information of original and decoded texts in
 200   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 201   They also set CODING->result to one of CODING_RESULT_XXX indicating
 202   how the decoding is finished.
 203
 204   Below is the template of these functions.  */
 205
 206 #if 0
 207 static void
 208 decode_coding_XXXX (coding)
 209      struct coding_system *coding;
 210 {
 211   const unsigned char *src = coding->source + coding->consumed;
 212   const unsigned char *src_end = coding->source + coding->src_bytes;
 213   /* SRC_BASE remembers the start position in source in each loop.
 214      The loop will be exited when there's not enough source code, or
 215      when there's no room in CHARBUF for a decoded character.  */
 216   const unsigned char *src_base;
 217   /* A buffer to produce decoded characters.  */
 218   int *charbuf = coding->charbuf + coding->charbuf_used;
 219   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 220   int multibytep = coding->src_multibyte;
 221
 222   while (1)
 223     {
 224       src_base = src;
 225       if (charbuf < charbuf_end)
 226         /* No more room to produce a decoded character.  */
 227         break;
 228       ONE_MORE_BYTE (c);
 229       /* Decode it. */
 230     }
 231
 232  no_more_source:
 233   if (src_base < src_end
 234       && coding->mode & CODING_MODE_LAST_BLOCK)
 235     /* If the source ends by partial bytes to construct a character,
 236        treat them as eight-bit raw data.  */
 237     while (src_base < src_end && charbuf < charbuf_end)
 238       *charbuf++ = *src_base++;
 239   /* Remember how many bytes and characters we consumed.  If the
 240      source is multibyte, the bytes and chars are not identical.  */
 241   coding->consumed = coding->consumed_char = src_base - coding->source;
 242   /* Remember how many characters we produced.  */
 243   coding->charbuf_used = charbuf - coding->charbuf;
 244 }
 245 #endif
 246
 247 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 248
 249   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 250   internal multibyte format by CODING.  The resulting byte sequence
 251   goes to a place pointed to by DESTINATION, the length of which
 252   should not exceed DST_BYTES.
 253
 254   These functions set the information of original and encoded texts in
 255   the members produced, produced_char, consumed, and consumed_char of
 256   the structure *CODING.  They also set the member result to one of
 257   CODING_RESULT_XXX indicating how the encoding finished.
 258
 259   DST_BYTES zero means that source area and destination area are
 260   overlapped, which means that we can produce a encoded text until it
 261   reaches at the head of not-yet-encoded source text.
 262
 263   Below is a template of these functions.  */
 264 #if 0
 265 static void
 266 encode_coding_XXX (coding)
 267      struct coding_system *coding;
 268 {
 269   int multibytep = coding->dst_multibyte;
 270   int *charbuf = coding->charbuf;
 271   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 272   unsigned char *dst = coding->destination + coding->produced;
 273   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 274   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 275   int produced_chars = 0;
 276
 277   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 278     {
 279       int c = *charbuf;
 280       /* Encode C into DST, and increment DST.  */
 281     }
 282  label_no_more_destination:
 283   /* How many chars and bytes we produced.  */
 284   coding->produced_char += produced_chars;
 285   coding->produced = dst - coding->destination;
 286 }
 287 #endif
 288
 289 \f
 290 /*** 1. Preamble ***/
 291
 292 #include <config.h>
 293 #include <stdio.h>
 294
 295 #include "lisp.h"
 296 #include "buffer.h"
 297 #include "character.h"
 298 #include "charset.h"
 299 #include "ccl.h"
 300 #include "composite.h"
 301 #include "coding.h"
 302 #include "window.h"
 303 #include "frame.h"
 304 #include "termhooks.h"
 305
 306 Lisp_Object Vcoding_system_hash_table;
 307
 308 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 309 Lisp_Object Qunix, Qdos;
 310 extern Lisp_Object Qmac;        /* frame.c */
 311 Lisp_Object Qbuffer_file_coding_system;
 312 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 313 Lisp_Object Qdefault_char;
 314 Lisp_Object Qno_conversion, Qundecided;
 315 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 316 Lisp_Object Qbig, Qlittle;
 317 Lisp_Object Qcoding_system_history;
 318 Lisp_Object Qvalid_codes;
 319 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 320 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 321 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 322 Lisp_Object QCascii_compatible_p;
 323
 324 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 325 Lisp_Object Qcall_process, Qcall_process_region;
 326 Lisp_Object Qstart_process, Qopen_network_stream;
 327 Lisp_Object Qtarget_idx;
 328
 329 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 330 Lisp_Object Qinterrupted, Qinsufficient_memory;
 331
 332 extern Lisp_Object Qcompletion_ignore_case;
 333
 334 /* If a symbol has this property, evaluate the value to define the
 335    symbol as a coding system.  */
 336 static Lisp_Object Qcoding_system_define_form;
 337
 338 int coding_system_require_warning;
 339
 340 Lisp_Object Vselect_safe_coding_system_function;
 341
 342 /* Mnemonic string for each format of end-of-line.  */
 343 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 344 /* Mnemonic string to indicate format of end-of-line is not yet
 345    decided.  */
 346 Lisp_Object eol_mnemonic_undecided;
 347
 348 /* Format of end-of-line decided by system.  This is Qunix on
 349    Unix and Mac, Qdos on DOS/Windows.
 350    This has an effect only for external encoding (i.e. for output to
 351    file and process), not for in-buffer or Lisp string encoding.  */
 352 static Lisp_Object system_eol_type;
 353
 354 #ifdef emacs
 355
 356 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 357
 358 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 359
 360 /* Coding system emacs-mule and raw-text are for converting only
 361    end-of-line format.  */
 362 Lisp_Object Qemacs_mule, Qraw_text;
 363 Lisp_Object Qutf_8_emacs;
 364
 365 /* Coding-systems are handed between Emacs Lisp programs and C internal
 366    routines by the following three variables.  */
 367 /* Coding-system for reading files and receiving data from process.  */
 368 Lisp_Object Vcoding_system_for_read;
 369 /* Coding-system for writing files and sending data to process.  */
 370 Lisp_Object Vcoding_system_for_write;
 371 /* Coding-system actually used in the latest I/O.  */
 372 Lisp_Object Vlast_coding_system_used;
 373 /* Set to non-nil when an error is detected while code conversion.  */
 374 Lisp_Object Vlast_code_conversion_error;
 375 /* A vector of length 256 which contains information about special
 376    Latin codes (especially for dealing with Microsoft codes).  */
 377 Lisp_Object Vlatin_extra_code_table;
 378
 379 /* Flag to inhibit code conversion of end-of-line format.  */
 380 int inhibit_eol_conversion;
 381
 382 /* Flag to inhibit ISO2022 escape sequence detection.  */
 383 int inhibit_iso_escape_detection;
 384
 385 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 386 int inherit_process_coding_system;
 387
 388 /* Coding system to be used to encode text for terminal display when
 389    terminal coding system is nil.  */
 390 struct coding_system safe_terminal_coding;
 391
 392 Lisp_Object Vfile_coding_system_alist;
 393 Lisp_Object Vprocess_coding_system_alist;
 394 Lisp_Object Vnetwork_coding_system_alist;
 395
 396 Lisp_Object Vlocale_coding_system;
 397
 398 #endif /* emacs */
 399
 400 /* Flag to tell if we look up translation table on character code
 401    conversion.  */
 402 Lisp_Object Venable_character_translation;
 403 /* Standard translation table to look up on decoding (reading).  */
 404 Lisp_Object Vstandard_translation_table_for_decode;
 405 /* Standard translation table to look up on encoding (writing).  */
 406 Lisp_Object Vstandard_translation_table_for_encode;
 407
 408 Lisp_Object Qtranslation_table;
 409 Lisp_Object Qtranslation_table_id;
 410 Lisp_Object Qtranslation_table_for_decode;
 411 Lisp_Object Qtranslation_table_for_encode;
 412
 413 /* Alist of charsets vs revision number.  */
 414 static Lisp_Object Vcharset_revision_table;
 415
 416 /* Default coding systems used for process I/O.  */
 417 Lisp_Object Vdefault_process_coding_system;
 418
 419 /* Char table for translating Quail and self-inserting input.  */
 420 Lisp_Object Vtranslation_table_for_input;
 421
 422 /* Two special coding systems.  */
 423 Lisp_Object Vsjis_coding_system;
 424 Lisp_Object Vbig5_coding_system;
 425
 426 /* ISO2022 section */
 427
 428 #define CODING_ISO_INITIAL(coding, reg)                 \
 429   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 430                      coding_attr_iso_initial),          \
 431                reg)))
 432
 433
 434 #define CODING_ISO_REQUEST(coding, charset_id)  \
 435   ((charset_id <= (coding)->max_charset_id      \
 436     ? (coding)->safe_charsets[charset_id]       \
 437     : -1))
 438
 439
 440 #define CODING_ISO_FLAGS(coding)        \
 441   ((coding)->spec.iso_2022.flags)
 442 #define CODING_ISO_DESIGNATION(coding, reg)     \
 443   ((coding)->spec.iso_2022.current_designation[reg])
 444 #define CODING_ISO_INVOCATION(coding, plane)    \
 445   ((coding)->spec.iso_2022.current_invocation[plane])
 446 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 447   ((coding)->spec.iso_2022.single_shifting)
 448 #define CODING_ISO_BOL(coding)  \
 449   ((coding)->spec.iso_2022.bol)
 450 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 451   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 452
 453 /* Control characters of ISO2022.  */
 454                         /* code */      /* function */
 455 #define ISO_CODE_LF     0x0A            /* line-feed */
 456 #define ISO_CODE_CR     0x0D            /* carriage-return */
 457 #define ISO_CODE_SO     0x0E            /* shift-out */
 458 #define ISO_CODE_SI     0x0F            /* shift-in */
 459 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 460 #define ISO_CODE_ESC    0x1B            /* escape */
 461 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 462 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 463 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 464
 465 /* All code (1-byte) of ISO2022 is classified into one of the
 466    followings.  */
 467 enum iso_code_class_type
 468   {
 469     ISO_control_0,              /* Control codes in the range
 470                                    0x00..0x1F and 0x7F, except for the
 471                                    following 5 codes.  */
 472     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 473     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 474     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 475     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 476     ISO_control_1,              /* Control codes in the range
 477                                    0x80..0x9F, except for the
 478                                    following 3 codes.  */
 479     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 480     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 481     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 482     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 483     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 484     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 485     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 486   };
 487
 488 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 489     `iso-flags' attribute of an iso2022 coding system.  */
 490
 491 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 492    instead of the correct short-form sequence (e.g. ESC $ A).  */
 493 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 494
 495 /* If set, reset graphic planes and registers at end-of-line to the
 496    initial state.  */
 497 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 498
 499 /* If set, reset graphic planes and registers before any control
 500    characters to the initial state.  */
 501 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 502
 503 /* If set, encode by 7-bit environment.  */
 504 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 505
 506 /* If set, use locking-shift function.  */
 507 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 508
 509 /* If set, use single-shift function.  Overwrite
 510    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 511 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 512
 513 /* If set, use designation escape sequence.  */
 514 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 515
 516 /* If set, produce revision number sequence.  */
 517 #define CODING_ISO_FLAG_REVISION        0x0080
 518
 519 /* If set, produce ISO6429's direction specifying sequence.  */
 520 #define CODING_ISO_FLAG_DIRECTION       0x0100
 521
 522 /* If set, assume designation states are reset at beginning of line on
 523    output.  */
 524 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 525
 526 /* If set, designation sequence should be placed at beginning of line
 527    on output.  */
 528 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 529
 530 /* If set, do not encode unsafe charactes on output.  */
 531 #define CODING_ISO_FLAG_SAFE            0x0800
 532
 533 /* If set, extra latin codes (128..159) are accepted as a valid code
 534    on input.  */
 535 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 536
 537 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 538
 539 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 540
 541 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 542
 543 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 544
 545 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 546
 547 /* A character to be produced on output if encoding of the original
 548    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 549 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 550
 551
 552 /* UTF-16 section */
 553 #define CODING_UTF_16_BOM(coding)       \
 554   ((coding)->spec.utf_16.bom)
 555
 556 #define CODING_UTF_16_ENDIAN(coding)    \
 557   ((coding)->spec.utf_16.endian)
 558
 559 #define CODING_UTF_16_SURROGATE(coding) \
 560   ((coding)->spec.utf_16.surrogate)
 561
 562
 563 /* CCL section */
 564 #define CODING_CCL_DECODER(coding)      \
 565   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 566 #define CODING_CCL_ENCODER(coding)      \
 567   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 568 #define CODING_CCL_VALIDS(coding)                                          \
 569   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 570
 571 /* Index for each coding category in `coding_categories' */
 572
 573 enum coding_category
 574   {
 575     coding_category_iso_7,
 576     coding_category_iso_7_tight,
 577     coding_category_iso_8_1,
 578     coding_category_iso_8_2,
 579     coding_category_iso_7_else,
 580     coding_category_iso_8_else,
 581     coding_category_utf_8,
 582     coding_category_utf_16_auto,
 583     coding_category_utf_16_be,
 584     coding_category_utf_16_le,
 585     coding_category_utf_16_be_nosig,
 586     coding_category_utf_16_le_nosig,
 587     coding_category_charset,
 588     coding_category_sjis,
 589     coding_category_big5,
 590     coding_category_ccl,
 591     coding_category_emacs_mule,
 592     /* All above are targets of code detection.  */
 593     coding_category_raw_text,
 594     coding_category_undecided,
 595     coding_category_max
 596   };
 597
 598 /* Definitions of flag bits used in detect_coding_XXXX.  */
 599 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 600 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 601 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 602 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 603 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 604 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 605 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 606 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 607 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 608 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 609 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 610 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 611 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 612 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 613 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 614 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 615 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 616 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 617
 618 /* This value is returned if detect_coding_mask () find nothing other
 619    than ASCII characters.  */
 620 #define CATEGORY_MASK_ANY               \
 621   (CATEGORY_MASK_ISO_7                  \
 622    | CATEGORY_MASK_ISO_7_TIGHT          \
 623    | CATEGORY_MASK_ISO_8_1              \
 624    | CATEGORY_MASK_ISO_8_2              \
 625    | CATEGORY_MASK_ISO_7_ELSE           \
 626    | CATEGORY_MASK_ISO_8_ELSE           \
 627    | CATEGORY_MASK_UTF_8                \
 628    | CATEGORY_MASK_UTF_16_BE            \
 629    | CATEGORY_MASK_UTF_16_LE            \
 630    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 631    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 632    | CATEGORY_MASK_CHARSET              \
 633    | CATEGORY_MASK_SJIS                 \
 634    | CATEGORY_MASK_BIG5                 \
 635    | CATEGORY_MASK_CCL                  \
 636    | CATEGORY_MASK_EMACS_MULE)
 637
 638
 639 #define CATEGORY_MASK_ISO_7BIT \
 640   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 641
 642 #define CATEGORY_MASK_ISO_8BIT \
 643   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 644
 645 #define CATEGORY_MASK_ISO_ELSE \
 646   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 647
 648 #define CATEGORY_MASK_ISO_ESCAPE        \
 649   (CATEGORY_MASK_ISO_7                  \
 650    | CATEGORY_MASK_ISO_7_TIGHT          \
 651    | CATEGORY_MASK_ISO_7_ELSE           \
 652    | CATEGORY_MASK_ISO_8_ELSE)
 653
 654 #define CATEGORY_MASK_ISO       \
 655   (  CATEGORY_MASK_ISO_7BIT     \
 656      | CATEGORY_MASK_ISO_8BIT   \
 657      | CATEGORY_MASK_ISO_ELSE)
 658
 659 #define CATEGORY_MASK_UTF_16            \
 660   (CATEGORY_MASK_UTF_16_BE              \
 661    | CATEGORY_MASK_UTF_16_LE            \
 662    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 663    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 664
 665
 666 /* List of symbols `coding-category-xxx' ordered by priority.  This
 667    variable is exposed to Emacs Lisp.  */
 668 static Lisp_Object Vcoding_category_list;
 669
 670 /* Table of coding categories (Lisp symbols).  This variable is for
 671    internal use oly.  */
 672 static Lisp_Object Vcoding_category_table;
 673
 674 /* Table of coding-categories ordered by priority.  */
 675 static enum coding_category coding_priorities[coding_category_max];
 676
 677 /* Nth element is a coding context for the coding system bound to the
 678    Nth coding category.  */
 679 static struct coding_system coding_categories[coding_category_max];
 680
 681 /*** Commonly used macros and functions ***/
 682
 683 #ifndef min
 684 #define min(a, b) ((a) < (b) ? (a) : (b))
 685 #endif
 686 #ifndef max
 687 #define max(a, b) ((a) > (b) ? (a) : (b))
 688 #endif
 689
 690 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 691   do {                                                  \
 692     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 693     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 694   } while (0)
 695
 696
 697 /* Safely get one byte from the source text pointed by SRC which ends
 698    at SRC_END, and set C to that byte.  If there are not enough bytes
 699    in the source, it jumps to `no_more_source'.  If multibytep is
 700    nonzero, and a multibyte character is found at SRC, set C to the
 701    negative value of the character code.  The caller should declare
 702    and set these variables appropriately in advance:
 703         src, src_end, multibytep */
 704
 705 #define ONE_MORE_BYTE(c)                                \
 706   do {                                                  \
 707     if (src == src_end)                                 \
 708       {                                                 \
 709         if (src_base < src)                             \
 710           record_conversion_result                      \
 711             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 712         goto no_more_source;                            \
 713       }                                                 \
 714     c = *src++;                                         \
 715     if (multibytep && (c & 0x80))                       \
 716       {                                                 \
 717         if ((c & 0xFE) == 0xC0)                         \
 718           c = ((c & 1) << 6) | *src++;                  \
 719         else                                            \
 720           {                                             \
 721             src--;                                      \
 722             c = - string_char (src, &src, NULL);        \
 723             record_conversion_result                    \
 724               (coding, CODING_RESULT_INVALID_SRC);      \
 725           }                                             \
 726       }                                                 \
 727     consumed_chars++;                                   \
 728   } while (0)
 729
 730
 731 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 732   do {                                                  \
 733     c = *src++;                                         \
 734     if (multibytep && (c & 0x80))                       \
 735       {                                                 \
 736         if ((c & 0xFE) == 0xC0)                         \
 737           c = ((c & 1) << 6) | *src++;                  \
 738         else                                            \
 739           {                                             \
 740             src--;                                      \
 741             c = - string_char (src, &src, NULL);        \
 742             record_conversion_result                    \
 743               (coding, CODING_RESULT_INVALID_SRC);      \
 744           }                                             \
 745       }                                                 \
 746     consumed_chars++;                                   \
 747   } while (0)
 748
 749
 750 /* Store a byte C in the place pointed by DST and increment DST to the
 751    next free point, and increment PRODUCED_CHARS.  The caller should
 752    assure that C is 0..127, and declare and set the variable `dst'
 753    appropriately in advance.
 754 */
 755
 756
 757 #define EMIT_ONE_ASCII_BYTE(c)  \
 758   do {                          \
 759     produced_chars++;           \
 760     *dst++ = (c);               \
 761   } while (0)
 762
 763
 764 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 765
 766 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 767   do {                                  \
 768     produced_chars += 2;                \
 769     *dst++ = (c1), *dst++ = (c2);       \
 770   } while (0)
 771
 772
 773 /* Store a byte C in the place pointed by DST and increment DST to the
 774    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 775    nonzero, store in an appropriate multibyte from.  The caller should
 776    declare and set the variables `dst' and `multibytep' appropriately
 777    in advance.  */
 778
 779 #define EMIT_ONE_BYTE(c)                \
 780   do {                                  \
 781     produced_chars++;                   \
 782     if (multibytep)                     \
 783       {                                 \
 784         int ch = (c);                   \
 785         if (ch >= 0x80)                 \
 786           ch = BYTE8_TO_CHAR (ch);      \
 787         CHAR_STRING_ADVANCE (ch, dst);  \
 788       }                                 \
 789     else                                \
 790       *dst++ = (c);                     \
 791   } while (0)
 792
 793
 794 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 795
 796 #define EMIT_TWO_BYTES(c1, c2)          \
 797   do {                                  \
 798     produced_chars += 2;                \
 799     if (multibytep)                     \
 800       {                                 \
 801         int ch;                         \
 802                                         \
 803         ch = (c1);                      \
 804         if (ch >= 0x80)                 \
 805           ch = BYTE8_TO_CHAR (ch);      \
 806         CHAR_STRING_ADVANCE (ch, dst);  \
 807         ch = (c2);                      \
 808         if (ch >= 0x80)                 \
 809           ch = BYTE8_TO_CHAR (ch);      \
 810         CHAR_STRING_ADVANCE (ch, dst);  \
 811       }                                 \
 812     else                                \
 813       {                                 \
 814         *dst++ = (c1);                  \
 815         *dst++ = (c2);                  \
 816       }                                 \
 817   } while (0)
 818
 819
 820 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 821   do {                                  \
 822     EMIT_ONE_BYTE (c1);                 \
 823     EMIT_TWO_BYTES (c2, c3);            \
 824   } while (0)
 825
 826
 827 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 828   do {                                          \
 829     EMIT_TWO_BYTES (c1, c2);                    \
 830     EMIT_TWO_BYTES (c3, c4);                    \
 831   } while (0)
 832
 833
 834 /* Prototypes for static functions.  */
 835 static void record_conversion_result P_ ((struct coding_system *coding,
 836                                           enum coding_result_code result));
 837 static int detect_coding_utf_8 P_ ((struct coding_system *,
 838                                     struct coding_detection_info *info));
 839 static void decode_coding_utf_8 P_ ((struct coding_system *));
 840 static int encode_coding_utf_8 P_ ((struct coding_system *));
 841
 842 static int detect_coding_utf_16 P_ ((struct coding_system *,
 843                                      struct coding_detection_info *info));
 844 static void decode_coding_utf_16 P_ ((struct coding_system *));
 845 static int encode_coding_utf_16 P_ ((struct coding_system *));
 846
 847 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 848                                        struct coding_detection_info *info));
 849 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 850 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 851
 852 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 853                                          struct coding_detection_info *info));
 854 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 855 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 856
 857 static int detect_coding_sjis P_ ((struct coding_system *,
 858                                    struct coding_detection_info *info));
 859 static void decode_coding_sjis P_ ((struct coding_system *));
 860 static int encode_coding_sjis P_ ((struct coding_system *));
 861
 862 static int detect_coding_big5 P_ ((struct coding_system *,
 863                                    struct coding_detection_info *info));
 864 static void decode_coding_big5 P_ ((struct coding_system *));
 865 static int encode_coding_big5 P_ ((struct coding_system *));
 866
 867 static int detect_coding_ccl P_ ((struct coding_system *,
 868                                   struct coding_detection_info *info));
 869 static void decode_coding_ccl P_ ((struct coding_system *));
 870 static int encode_coding_ccl P_ ((struct coding_system *));
 871
 872 static void decode_coding_raw_text P_ ((struct coding_system *));
 873 static int encode_coding_raw_text P_ ((struct coding_system *));
 874
 875 static void coding_set_source P_ ((struct coding_system *));
 876 static void coding_set_destination P_ ((struct coding_system *));
 877 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 878 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 879                                             EMACS_INT, EMACS_INT));
 880 static unsigned char *alloc_destination P_ ((struct coding_system *,
 881                                              EMACS_INT, unsigned char *));
 882 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 883 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 884                                                      int *, int *,
 885                                                      unsigned char *));
 886 static int detect_eol P_ ((const unsigned char *,
 887                            EMACS_INT, enum coding_category));
 888 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 889 static void decode_eol P_ ((struct coding_system *));
 890 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 891 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 892                                         int, int *, int *));
 893 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 894 static INLINE void produce_composition P_ ((struct coding_system *, int *,
 895                                             EMACS_INT));
 896 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 897                                         EMACS_INT));
 898 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 899 static int decode_coding P_ ((struct coding_system *));
 900 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 901                                                       struct coding_system *,
 902                                                       int *, EMACS_INT *));
 903 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 904                                                   struct coding_system *,
 905                                                   int *, EMACS_INT *));
 906 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 907 static int encode_coding P_ ((struct coding_system *));
 908 static Lisp_Object make_conversion_work_buffer P_ ((int));
 909 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 910 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 911 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 912
 913 static void
 914 record_conversion_result (struct coding_system *coding,
 915                           enum coding_result_code result)
 916 {
 917   coding->result = result;
 918   switch (result)
 919     {
 920     case CODING_RESULT_INSUFFICIENT_SRC:
 921       Vlast_code_conversion_error = Qinsufficient_source;
 922       break;
 923     case CODING_RESULT_INCONSISTENT_EOL:
 924       Vlast_code_conversion_error = Qinconsistent_eol;
 925       break;
 926     case CODING_RESULT_INVALID_SRC:
 927       Vlast_code_conversion_error = Qinvalid_source;
 928       break;
 929     case CODING_RESULT_INTERRUPT:
 930       Vlast_code_conversion_error = Qinterrupted;
 931       break;
 932     case CODING_RESULT_INSUFFICIENT_MEM:
 933       Vlast_code_conversion_error = Qinsufficient_memory;
 934       break;
 935     default:
 936       Vlast_code_conversion_error = intern ("Unknown error");
 937     }
 938 }
 939
 940 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 941   do {                                                                       \
 942     charset_map_loaded = 0;                                                  \
 943     c = DECODE_CHAR (charset, code);                                         \
 944     if (charset_map_loaded)                                                  \
 945       {                                                                      \
 946         const unsigned char *orig = coding->source;                          \
 947         EMACS_INT offset;                                                    \
 948                                                                              \
 949         coding_set_source (coding);                                          \
 950         offset = coding->source - orig;                                      \
 951         src += offset;                                                       \
 952         src_base += offset;                                                  \
 953         src_end += offset;                                                   \
 954       }                                                                      \
 955   } while (0)
 956
 957
 958 /* If there are at least BYTES length of room at dst, allocate memory
 959    for coding->destination and update dst and dst_end.  We don't have
 960    to take care of coding->source which will be relocated.  It is
 961    handled by calling coding_set_source in encode_coding.  */
 962
 963 #define ASSURE_DESTINATION(bytes)                               \
 964   do {                                                          \
 965     if (dst + (bytes) >= dst_end)                               \
 966       {                                                         \
 967         int more_bytes = charbuf_end - charbuf + (bytes);       \
 968                                                                 \
 969         dst = alloc_destination (coding, more_bytes, dst);      \
 970         dst_end = coding->destination + coding->dst_bytes;      \
 971       }                                                         \
 972   } while (0)
 973
 974
 975 /* Store multibyte form of the character C in P, and advance P to the
 976    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
 977    never calls MAYBE_UNIFY_CHAR.  */
 978
 979 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
 980   do {                                          \
 981     if ((c) <= MAX_1_BYTE_CHAR)                 \
 982       *(p)++ = (c);                             \
 983     else if ((c) <= MAX_2_BYTE_CHAR)            \
 984       *(p)++ = (0xC0 | ((c) >> 6)),             \
 985         *(p)++ = (0x80 | ((c) & 0x3F));         \
 986     else if ((c) <= MAX_3_BYTE_CHAR)            \
 987       *(p)++ = (0xE0 | ((c) >> 12)),            \
 988         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
 989         *(p)++ = (0x80 | ((c) & 0x3F));         \
 990     else if ((c) <= MAX_4_BYTE_CHAR)            \
 991       *(p)++ = (0xF0 | (c >> 18)),              \
 992         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
 993         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
 994         *(p)++ = (0x80 | (c & 0x3F));           \
 995     else if ((c) <= MAX_5_BYTE_CHAR)            \
 996       *(p)++ = 0xF8,                            \
 997         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
 998         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
 999         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1000         *(p)++ = (0x80 | (c & 0x3F));           \
1001     else                                        \
1002       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1003   } while (0)
1004
1005
1006 /* Return the character code of character whose multibyte form is at
1007    P, and advance P to the end of the multibyte form.  This is like
1008    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1009
1010 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1011   (!((p)[0] & 0x80)                                             \
1012    ? *(p)++                                                     \
1013    : ! ((p)[0] & 0x20)                                          \
1014    ? ((p) += 2,                                                 \
1015       ((((p)[-2] & 0x1F) << 6)                                  \
1016        | ((p)[-1] & 0x3F)                                       \
1017        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1018    : ! ((p)[0] & 0x10)                                          \
1019    ? ((p) += 3,                                                 \
1020       ((((p)[-3] & 0x0F) << 12)                                 \
1021        | (((p)[-2] & 0x3F) << 6)                                \
1022        | ((p)[-1] & 0x3F)))                                     \
1023    : ! ((p)[0] & 0x08)                                          \
1024    ? ((p) += 4,                                                 \
1025       ((((p)[-4] & 0xF) << 18)                                  \
1026        | (((p)[-3] & 0x3F) << 12)                               \
1027        | (((p)[-2] & 0x3F) << 6)                                \
1028        | ((p)[-1] & 0x3F)))                                     \
1029    : ((p) += 5,                                                 \
1030       ((((p)[-4] & 0x3F) << 18)                                 \
1031        | (((p)[-3] & 0x3F) << 12)                               \
1032        | (((p)[-2] & 0x3F) << 6)                                \
1033        | ((p)[-1] & 0x3F))))
1034
1035
1036 static void
1037 coding_set_source (coding)
1038      struct coding_system *coding;
1039 {
1040   if (BUFFERP (coding->src_object))
1041     {
1042       struct buffer *buf = XBUFFER (coding->src_object);
1043
1044       if (coding->src_pos < 0)
1045         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1046       else
1047         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1048     }
1049   else if (STRINGP (coding->src_object))
1050     {
1051       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1052     }
1053   else
1054     /* Otherwise, the source is C string and is never relocated
1055        automatically.  Thus we don't have to update anything.  */
1056     ;
1057 }
1058
1059 static void
1060 coding_set_destination (coding)
1061      struct coding_system *coding;
1062 {
1063   if (BUFFERP (coding->dst_object))
1064     {
1065       if (coding->src_pos < 0)
1066         {
1067           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1068           coding->dst_bytes = (GAP_END_ADDR
1069                                - (coding->src_bytes - coding->consumed)
1070                                - coding->destination);
1071         }
1072       else
1073         {
1074           /* We are sure that coding->dst_pos_byte is before the gap
1075              of the buffer. */
1076           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1077                                  + coding->dst_pos_byte - BEG_BYTE);
1078           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1079                                - coding->destination);
1080         }
1081     }
1082   else
1083     /* Otherwise, the destination is C string and is never relocated
1084        automatically.  Thus we don't have to update anything.  */
1085     ;
1086 }
1087
1088
1089 static void
1090 coding_alloc_by_realloc (coding, bytes)
1091      struct coding_system *coding;
1092      EMACS_INT bytes;
1093 {
1094   coding->destination = (unsigned char *) xrealloc (coding->destination,
1095                                                     coding->dst_bytes + bytes);
1096   coding->dst_bytes += bytes;
1097 }
1098
1099 static void
1100 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1101      struct coding_system *coding;
1102      EMACS_INT gap_head_used, bytes;
1103 {
1104   if (EQ (coding->src_object, coding->dst_object))
1105     {
1106       /* The gap may contain the produced data at the head and not-yet
1107          consumed data at the tail.  To preserve those data, we at
1108          first make the gap size to zero, then increase the gap
1109          size.  */
1110       EMACS_INT add = GAP_SIZE;
1111
1112       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1113       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1114       make_gap (bytes);
1115       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1116       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1117     }
1118   else
1119     {
1120       Lisp_Object this_buffer;
1121
1122       this_buffer = Fcurrent_buffer ();
1123       set_buffer_internal (XBUFFER (coding->dst_object));
1124       make_gap (bytes);
1125       set_buffer_internal (XBUFFER (this_buffer));
1126     }
1127 }
1128
1129
1130 static unsigned char *
1131 alloc_destination (coding, nbytes, dst)
1132      struct coding_system *coding;
1133      EMACS_INT nbytes;
1134      unsigned char *dst;
1135 {
1136   EMACS_INT offset = dst - coding->destination;
1137
1138   if (BUFFERP (coding->dst_object))
1139     {
1140       struct buffer *buf = XBUFFER (coding->dst_object);
1141
1142       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1143     }
1144   else
1145     coding_alloc_by_realloc (coding, nbytes);
1146   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1147   coding_set_destination (coding);
1148   dst = coding->destination + offset;
1149   return dst;
1150 }
1151
1152 /** Macros for annotations.  */
1153
1154 /* Maximum length of annotation data (sum of annotations for
1155    composition and charset).  */
1156 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1157
1158 /* An annotation data is stored in the array coding->charbuf in this
1159    format:
1160      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1161    LENGTH is the number of elements in the annotation.
1162    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1163    NCHARS is the number of characters in the text annotated.
1164
1165    The format of the following elements depend on ANNOTATION_MASK.
1166
1167    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1168    follows:
1169      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1170    METHOD is one of enum composition_method.
1171    Optionnal COMPOSITION-COMPONENTS are characters and composition
1172    rules.
1173
1174    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1175    follows.  */
1176
1177 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1178   do {                                                  \
1179     *(buf)++ = -(len);                                  \
1180     *(buf)++ = (mask);                                  \
1181     *(buf)++ = (nchars);                                \
1182     coding->annotated = 1;                              \
1183   } while (0);
1184
1185 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1186   do {                                                                      \
1187     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1188     *buf++ = method;                                                        \
1189   } while (0)
1190
1191
1192 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1193   do {                                                                  \
1194     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1195     *buf++ = id;                                                        \
1196   } while (0)
1197
1198 \f
1199 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1200
1201
1202
1203 \f
1204 /*** 3. UTF-8 ***/
1205
1206 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1207    Check if a text is encoded in UTF-8.  If it is, return 1, else
1208    return 0.  */
1209
1210 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1211 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1212 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1213 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1214 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1215 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1216
1217 static int
1218 detect_coding_utf_8 (coding, detect_info)
1219      struct coding_system *coding;
1220      struct coding_detection_info *detect_info;
1221 {
1222   const unsigned char *src = coding->source, *src_base;
1223   const unsigned char *src_end = coding->source + coding->src_bytes;
1224   int multibytep = coding->src_multibyte;
1225   int consumed_chars = 0;
1226   int found = 0;
1227
1228   detect_info->checked |= CATEGORY_MASK_UTF_8;
1229   /* A coding system of this category is always ASCII compatible.  */
1230   src += coding->head_ascii;
1231
1232   while (1)
1233     {
1234       int c, c1, c2, c3, c4;
1235
1236       src_base = src;
1237       ONE_MORE_BYTE (c);
1238       if (c < 0 || UTF_8_1_OCTET_P (c))
1239         continue;
1240       ONE_MORE_BYTE (c1);
1241       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1242         break;
1243       if (UTF_8_2_OCTET_LEADING_P (c))
1244         {
1245           found = CATEGORY_MASK_UTF_8;
1246           continue;
1247         }
1248       ONE_MORE_BYTE (c2);
1249       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1250         break;
1251       if (UTF_8_3_OCTET_LEADING_P (c))
1252         {
1253           found = CATEGORY_MASK_UTF_8;
1254           continue;
1255         }
1256       ONE_MORE_BYTE (c3);
1257       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1258         break;
1259       if (UTF_8_4_OCTET_LEADING_P (c))
1260         {
1261           found = CATEGORY_MASK_UTF_8;
1262           continue;
1263         }
1264       ONE_MORE_BYTE (c4);
1265       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1266         break;
1267       if (UTF_8_5_OCTET_LEADING_P (c))
1268         {
1269           found = CATEGORY_MASK_UTF_8;
1270           continue;
1271         }
1272       break;
1273     }
1274   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1275   return 0;
1276
1277  no_more_source:
1278   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1279     {
1280       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1281       return 0;
1282     }
1283   detect_info->found |= found;
1284   return 1;
1285 }
1286
1287
1288 static void
1289 decode_coding_utf_8 (coding)
1290      struct coding_system *coding;
1291 {
1292   const unsigned char *src = coding->source + coding->consumed;
1293   const unsigned char *src_end = coding->source + coding->src_bytes;
1294   const unsigned char *src_base;
1295   int *charbuf = coding->charbuf + coding->charbuf_used;
1296   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1297   int consumed_chars = 0, consumed_chars_base;
1298   int multibytep = coding->src_multibyte;
1299   Lisp_Object attr, charset_list;
1300   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1301   int byte_after_cr = -1;
1302
1303   CODING_GET_INFO (coding, attr, charset_list);
1304
1305   while (1)
1306     {
1307       int c, c1, c2, c3, c4, c5;
1308
1309       src_base = src;
1310       consumed_chars_base = consumed_chars;
1311
1312       if (charbuf >= charbuf_end)
1313         break;
1314
1315       if (byte_after_cr >= 0)
1316         c1 = byte_after_cr, byte_after_cr = -1;
1317       else
1318         ONE_MORE_BYTE (c1);
1319       if (c1 < 0)
1320         {
1321           c = - c1;
1322         }
1323       else if (UTF_8_1_OCTET_P(c1))
1324         {
1325           if (eol_crlf && c1 == '\r')
1326             ONE_MORE_BYTE (byte_after_cr);
1327           c = c1;
1328         }
1329       else
1330         {
1331           ONE_MORE_BYTE (c2);
1332           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1333             goto invalid_code;
1334           if (UTF_8_2_OCTET_LEADING_P (c1))
1335             {
1336               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1337               /* Reject overlong sequences here and below.  Encoders
1338                  producing them are incorrect, they can be misleading,
1339                  and they mess up read/write invariance.  */
1340               if (c < 128)
1341                 goto invalid_code;
1342             }
1343           else
1344             {
1345               ONE_MORE_BYTE (c3);
1346               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1347                 goto invalid_code;
1348               if (UTF_8_3_OCTET_LEADING_P (c1))
1349                 {
1350                   c = (((c1 & 0xF) << 12)
1351                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1352                   if (c < 0x800
1353                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1354                     goto invalid_code;
1355                 }
1356               else
1357                 {
1358                   ONE_MORE_BYTE (c4);
1359                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1360                     goto invalid_code;
1361                   if (UTF_8_4_OCTET_LEADING_P (c1))
1362                     {
1363                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1364                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1365                     if (c < 0x10000)
1366                       goto invalid_code;
1367                     }
1368                   else
1369                     {
1370                       ONE_MORE_BYTE (c5);
1371                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1372                         goto invalid_code;
1373                       if (UTF_8_5_OCTET_LEADING_P (c1))
1374                         {
1375                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1376                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1377                                | (c5 & 0x3F));
1378                           if ((c > MAX_CHAR) || (c < 0x200000))
1379                             goto invalid_code;
1380                         }
1381                       else
1382                         goto invalid_code;
1383                     }
1384                 }
1385             }
1386         }
1387
1388       *charbuf++ = c;
1389       continue;
1390
1391     invalid_code:
1392       src = src_base;
1393       consumed_chars = consumed_chars_base;
1394       ONE_MORE_BYTE (c);
1395       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1396       coding->errors++;
1397     }
1398
1399  no_more_source:
1400   coding->consumed_char += consumed_chars_base;
1401   coding->consumed = src_base - coding->source;
1402   coding->charbuf_used = charbuf - coding->charbuf;
1403 }
1404
1405
1406 static int
1407 encode_coding_utf_8 (coding)
1408      struct coding_system *coding;
1409 {
1410   int multibytep = coding->dst_multibyte;
1411   int *charbuf = coding->charbuf;
1412   int *charbuf_end = charbuf + coding->charbuf_used;
1413   unsigned char *dst = coding->destination + coding->produced;
1414   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1415   int produced_chars = 0;
1416   int c;
1417
1418   if (multibytep)
1419     {
1420       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1421
1422       while (charbuf < charbuf_end)
1423         {
1424           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1425
1426           ASSURE_DESTINATION (safe_room);
1427           c = *charbuf++;
1428           if (CHAR_BYTE8_P (c))
1429             {
1430               c = CHAR_TO_BYTE8 (c);
1431               EMIT_ONE_BYTE (c);
1432             }
1433           else
1434             {
1435               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1436               for (p = str; p < pend; p++)
1437                 EMIT_ONE_BYTE (*p);
1438             }
1439         }
1440     }
1441   else
1442     {
1443       int safe_room = MAX_MULTIBYTE_LENGTH;
1444
1445       while (charbuf < charbuf_end)
1446         {
1447           ASSURE_DESTINATION (safe_room);
1448           c = *charbuf++;
1449           if (CHAR_BYTE8_P (c))
1450             *dst++ = CHAR_TO_BYTE8 (c);
1451           else
1452             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1453           produced_chars++;
1454         }
1455     }
1456   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1457   coding->produced_char += produced_chars;
1458   coding->produced = dst - coding->destination;
1459   return 0;
1460 }
1461
1462
1463 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1464    Check if a text is encoded in one of UTF-16 based coding systems.
1465    If it is, return 1, else return 0.  */
1466
1467 #define UTF_16_HIGH_SURROGATE_P(val) \
1468   (((val) & 0xFC00) == 0xD800)
1469
1470 #define UTF_16_LOW_SURROGATE_P(val) \
1471   (((val) & 0xFC00) == 0xDC00)
1472
1473 #define UTF_16_INVALID_P(val)   \
1474   (((val) == 0xFFFE)            \
1475    || ((val) == 0xFFFF)         \
1476    || UTF_16_LOW_SURROGATE_P (val))
1477
1478
1479 static int
1480 detect_coding_utf_16 (coding, detect_info)
1481      struct coding_system *coding;
1482      struct coding_detection_info *detect_info;
1483 {
1484   const unsigned char *src = coding->source, *src_base = src;
1485   const unsigned char *src_end = coding->source + coding->src_bytes;
1486   int multibytep = coding->src_multibyte;
1487   int consumed_chars = 0;
1488   int c1, c2;
1489
1490   detect_info->checked |= CATEGORY_MASK_UTF_16;
1491   if (coding->mode & CODING_MODE_LAST_BLOCK
1492       && (coding->src_chars & 1))
1493     {
1494       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1495       return 0;
1496     }
1497
1498   ONE_MORE_BYTE (c1);
1499   ONE_MORE_BYTE (c2);
1500   if ((c1 == 0xFF) && (c2 == 0xFE))
1501     {
1502       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1503                              | CATEGORY_MASK_UTF_16_AUTO);
1504       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1505                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1506                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1507     }
1508   else if ((c1 == 0xFE) && (c2 == 0xFF))
1509     {
1510       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1511                              | CATEGORY_MASK_UTF_16_AUTO);
1512       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1513                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1514                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1515     }
1516   else if (c1 >= 0 && c2 >= 0)
1517     {
1518       detect_info->rejected
1519         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1520     }
1521  no_more_source:
1522   return 1;
1523 }
1524
1525 static void
1526 decode_coding_utf_16 (coding)
1527      struct coding_system *coding;
1528 {
1529   const unsigned char *src = coding->source + coding->consumed;
1530   const unsigned char *src_end = coding->source + coding->src_bytes;
1531   const unsigned char *src_base;
1532   int *charbuf = coding->charbuf + coding->charbuf_used;
1533   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1534   int consumed_chars = 0, consumed_chars_base;
1535   int multibytep = coding->src_multibyte;
1536   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1537   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1538   int surrogate = CODING_UTF_16_SURROGATE (coding);
1539   Lisp_Object attr, charset_list;
1540   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1541   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1542
1543   CODING_GET_INFO (coding, attr, charset_list);
1544
1545   if (bom == utf_16_with_bom)
1546     {
1547       int c, c1, c2;
1548
1549       src_base = src;
1550       ONE_MORE_BYTE (c1);
1551       ONE_MORE_BYTE (c2);
1552       c = (c1 << 8) | c2;
1553
1554       if (endian == utf_16_big_endian
1555           ? c != 0xFEFF : c != 0xFFFE)
1556         {
1557           /* The first two bytes are not BOM.  Treat them as bytes
1558              for a normal character.  */
1559           src = src_base;
1560           coding->errors++;
1561         }
1562       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1563     }
1564   else if (bom == utf_16_detect_bom)
1565     {
1566       /* We have already tried to detect BOM and failed in
1567          detect_coding.  */
1568       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1569     }
1570
1571   while (1)
1572     {
1573       int c, c1, c2;
1574
1575       src_base = src;
1576       consumed_chars_base = consumed_chars;
1577
1578       if (charbuf + 2 >= charbuf_end)
1579         break;
1580
1581       if (byte_after_cr1 >= 0)
1582         c1 = byte_after_cr1, byte_after_cr1 = -1;
1583       else
1584         ONE_MORE_BYTE (c1);
1585       if (c1 < 0)
1586         {
1587           *charbuf++ = -c1;
1588           continue;
1589         }
1590       if (byte_after_cr2 >= 0)
1591         c2 = byte_after_cr2, byte_after_cr2 = -1;
1592       else
1593         ONE_MORE_BYTE (c2);
1594       if (c2 < 0)
1595         {
1596           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1597           *charbuf++ = -c2;
1598           continue;
1599         }
1600       c = (endian == utf_16_big_endian
1601            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1602
1603       if (surrogate)
1604         {
1605           if (! UTF_16_LOW_SURROGATE_P (c))
1606             {
1607               if (endian == utf_16_big_endian)
1608                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1609               else
1610                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1611               *charbuf++ = c1;
1612               *charbuf++ = c2;
1613               coding->errors++;
1614               if (UTF_16_HIGH_SURROGATE_P (c))
1615                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1616               else
1617                 *charbuf++ = c;
1618             }
1619           else
1620             {
1621               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1622               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1623               *charbuf++ = 0x10000 + c;
1624             }
1625         }
1626       else
1627         {
1628           if (UTF_16_HIGH_SURROGATE_P (c))
1629             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1630           else
1631             {
1632               if (eol_crlf && c == '\r')
1633                 {
1634                   ONE_MORE_BYTE (byte_after_cr1);
1635                   ONE_MORE_BYTE (byte_after_cr2);
1636                 }
1637               *charbuf++ = c;
1638             }
1639         }
1640     }
1641
1642  no_more_source:
1643   coding->consumed_char += consumed_chars_base;
1644   coding->consumed = src_base - coding->source;
1645   coding->charbuf_used = charbuf - coding->charbuf;
1646 }
1647
1648 static int
1649 encode_coding_utf_16 (coding)
1650      struct coding_system *coding;
1651 {
1652   int multibytep = coding->dst_multibyte;
1653   int *charbuf = coding->charbuf;
1654   int *charbuf_end = charbuf + coding->charbuf_used;
1655   unsigned char *dst = coding->destination + coding->produced;
1656   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1657   int safe_room = 8;
1658   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1659   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1660   int produced_chars = 0;
1661   Lisp_Object attrs, charset_list;
1662   int c;
1663
1664   CODING_GET_INFO (coding, attrs, charset_list);
1665
1666   if (bom != utf_16_without_bom)
1667     {
1668       ASSURE_DESTINATION (safe_room);
1669       if (big_endian)
1670         EMIT_TWO_BYTES (0xFE, 0xFF);
1671       else
1672         EMIT_TWO_BYTES (0xFF, 0xFE);
1673       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1674     }
1675
1676   while (charbuf < charbuf_end)
1677     {
1678       ASSURE_DESTINATION (safe_room);
1679       c = *charbuf++;
1680       if (c >= MAX_UNICODE_CHAR)
1681         c = coding->default_char;
1682
1683       if (c < 0x10000)
1684         {
1685           if (big_endian)
1686             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1687           else
1688             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1689         }
1690       else
1691         {
1692           int c1, c2;
1693
1694           c -= 0x10000;
1695           c1 = (c >> 10) + 0xD800;
1696           c2 = (c & 0x3FF) + 0xDC00;
1697           if (big_endian)
1698             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1699           else
1700             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1701         }
1702     }
1703   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1704   coding->produced = dst - coding->destination;
1705   coding->produced_char += produced_chars;
1706   return 0;
1707 }
1708
1709 \f
1710 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1711
1712 /* Emacs' internal format for representation of multiple character
1713    sets is a kind of multi-byte encoding, i.e. characters are
1714    represented by variable-length sequences of one-byte codes.
1715
1716    ASCII characters and control characters (e.g. `tab', `newline') are
1717    represented by one-byte sequences which are their ASCII codes, in
1718    the range 0x00 through 0x7F.
1719
1720    8-bit characters of the range 0x80..0x9F are represented by
1721    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1722    code + 0x20).
1723
1724    8-bit characters of the range 0xA0..0xFF are represented by
1725    one-byte sequences which are their 8-bit code.
1726
1727    The other characters are represented by a sequence of `base
1728    leading-code', optional `extended leading-code', and one or two
1729    `position-code's.  The length of the sequence is determined by the
1730    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1731    whereas extended leading-code and position-code take the range 0xA0
1732    through 0xFF.  See `charset.h' for more details about leading-code
1733    and position-code.
1734
1735    --- CODE RANGE of Emacs' internal format ---
1736    character set        range
1737    -------------        -----
1738    ascii                0x00..0x7F
1739    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1740    eight-bit-graphic    0xA0..0xBF
1741    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1742    ---------------------------------------------
1743
1744    As this is the internal character representation, the format is
1745    usually not used externally (i.e. in a file or in a data sent to a
1746    process).  But, it is possible to have a text externally in this
1747    format (i.e. by encoding by the coding system `emacs-mule').
1748
1749    In that case, a sequence of one-byte codes has a slightly different
1750    form.
1751
1752    At first, all characters in eight-bit-control are represented by
1753    one-byte sequences which are their 8-bit code.
1754
1755    Next, character composition data are represented by the byte
1756    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1757    where,
1758         METHOD is 0xF0 plus one of composition method (enum
1759         composition_method),
1760
1761         BYTES is 0xA0 plus a byte length of this composition data,
1762
1763         CHARS is 0x20 plus a number of characters composed by this
1764         data,
1765
1766         COMPONENTs are characters of multibye form or composition
1767         rules encoded by two-byte of ASCII codes.
1768
1769    In addition, for backward compatibility, the following formats are
1770    also recognized as composition data on decoding.
1771
1772    0x80 MSEQ ...
1773    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1774
1775    Here,
1776         MSEQ is a multibyte form but in these special format:
1777           ASCII: 0xA0 ASCII_CODE+0x80,
1778           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1779         RULE is a one byte code of the range 0xA0..0xF0 that
1780         represents a composition rule.
1781   */
1782
1783 char emacs_mule_bytes[256];
1784
1785 int
1786 emacs_mule_char (coding, src, nbytes, nchars, id)
1787      struct coding_system *coding;
1788      const unsigned char *src;
1789      int *nbytes, *nchars, *id;
1790 {
1791   const unsigned char *src_end = coding->source + coding->src_bytes;
1792   const unsigned char *src_base = src;
1793   int multibytep = coding->src_multibyte;
1794   struct charset *charset;
1795   unsigned code;
1796   int c;
1797   int consumed_chars = 0;
1798
1799   ONE_MORE_BYTE (c);
1800   if (c < 0)
1801     {
1802       c = -c;
1803       charset = emacs_mule_charset[0];
1804     }
1805   else
1806     {
1807       if (c >= 0xA0)
1808         {
1809           /* Old style component character of a composition.  */
1810           if (c == 0xA0)
1811             {
1812               ONE_MORE_BYTE (c);
1813               c -= 0x80;
1814             }
1815           else
1816             c -= 0x20;
1817         }
1818
1819       switch (emacs_mule_bytes[c])
1820         {
1821         case 2:
1822           if (! (charset = emacs_mule_charset[c]))
1823             goto invalid_code;
1824           ONE_MORE_BYTE (c);
1825           if (c < 0xA0)
1826             goto invalid_code;
1827           code = c & 0x7F;
1828           break;
1829
1830         case 3:
1831           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1832               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1833             {
1834               ONE_MORE_BYTE (c);
1835               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
1836                 goto invalid_code;
1837               ONE_MORE_BYTE (c);
1838               if (c < 0xA0)
1839                 goto invalid_code;
1840               code = c & 0x7F;
1841             }
1842           else
1843             {
1844               if (! (charset = emacs_mule_charset[c]))
1845                 goto invalid_code;
1846               ONE_MORE_BYTE (c);
1847               if (c < 0xA0)
1848                 goto invalid_code;
1849               code = (c & 0x7F) << 8;
1850               ONE_MORE_BYTE (c);
1851               if (c < 0xA0)
1852                 goto invalid_code;
1853               code |= c & 0x7F;
1854             }
1855           break;
1856
1857         case 4:
1858           ONE_MORE_BYTE (c);
1859           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1860             goto invalid_code;
1861           ONE_MORE_BYTE (c);
1862           if (c < 0xA0)
1863             goto invalid_code;
1864           code = (c & 0x7F) << 8;
1865           ONE_MORE_BYTE (c);
1866           if (c < 0xA0)
1867             goto invalid_code;
1868           code |= c & 0x7F;
1869           break;
1870
1871         case 1:
1872           code = c;
1873           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1874                                      ? charset_ascii : charset_eight_bit);
1875           break;
1876
1877         default:
1878           abort ();
1879         }
1880       c = DECODE_CHAR (charset, code);
1881       if (c < 0)
1882         goto invalid_code;
1883     }
1884   *nbytes = src - src_base;
1885   *nchars = consumed_chars;
1886   if (id)
1887     *id = charset->id;
1888   return c;
1889
1890  no_more_source:
1891   return -2;
1892
1893  invalid_code:
1894   return -1;
1895 }
1896
1897
1898 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1899    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1900    else return 0.  */
1901
1902 static int
1903 detect_coding_emacs_mule (coding, detect_info)
1904      struct coding_system *coding;
1905      struct coding_detection_info *detect_info;
1906 {
1907   const unsigned char *src = coding->source, *src_base;
1908   const unsigned char *src_end = coding->source + coding->src_bytes;
1909   int multibytep = coding->src_multibyte;
1910   int consumed_chars = 0;
1911   int c;
1912   int found = 0;
1913
1914   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1915   /* A coding system of this category is always ASCII compatible.  */
1916   src += coding->head_ascii;
1917
1918   while (1)
1919     {
1920       src_base = src;
1921       ONE_MORE_BYTE (c);
1922       if (c < 0)
1923         continue;
1924       if (c == 0x80)
1925         {
1926           /* Perhaps the start of composite character.  We simple skip
1927              it because analyzing it is too heavy for detecting.  But,
1928              at least, we check that the composite character
1929              constitues of more than 4 bytes.  */
1930           const unsigned char *src_base;
1931
1932         repeat:
1933           src_base = src;
1934           do
1935             {
1936               ONE_MORE_BYTE (c);
1937             }
1938           while (c >= 0xA0);
1939
1940           if (src - src_base <= 4)
1941             break;
1942           found = CATEGORY_MASK_EMACS_MULE;
1943           if (c == 0x80)
1944             goto repeat;
1945         }
1946
1947       if (c < 0x80)
1948         {
1949           if (c < 0x20
1950               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1951             break;
1952         }
1953       else
1954         {
1955           int more_bytes = emacs_mule_bytes[*src_base] - 1;
1956
1957           while (more_bytes > 0)
1958             {
1959               ONE_MORE_BYTE (c);
1960               if (c < 0xA0)
1961                 {
1962                   src--;        /* Unread the last byte.  */
1963                   break;
1964                 }
1965               more_bytes--;
1966             }
1967           if (more_bytes != 0)
1968             break;
1969           found = CATEGORY_MASK_EMACS_MULE;
1970         }
1971     }
1972   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1973   return 0;
1974
1975  no_more_source:
1976   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1977     {
1978       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1979       return 0;
1980     }
1981   detect_info->found |= found;
1982   return 1;
1983 }
1984
1985
1986 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1987
1988 /* Decode a character represented as a component of composition
1989    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1990    update SRC to the head of next character (or an encoded composition
1991    rule).  If SRC doesn't points a composition component, set C to -1.
1992    If SRC points an invalid byte sequence, global exit by a return
1993    value 0.  */
1994
1995 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1996   do                                                            \
1997     {                                                           \
1998       int c;                                                    \
1999       int nbytes, nchars;                                       \
2000                                                                 \
2001       if (src == src_end)                                       \
2002         break;                                                  \
2003       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
2004       if (c < 0)                                                \
2005         {                                                       \
2006           if (c == -2)                                          \
2007             break;                                              \
2008           goto invalid_code;                                    \
2009         }                                                       \
2010       *buf++ = c;                                               \
2011       src += nbytes;                                            \
2012       consumed_chars += nchars;                                 \
2013     }                                                           \
2014   while (0)
2015
2016
2017 /* Decode a composition rule represented as a component of composition
2018    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
2019    and increment BUF.  If SRC points an invalid byte sequence, set C
2020    to -1.  */
2021
2022 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
2023   do {                                                  \
2024     int c, gref, nref;                                  \
2025                                                         \
2026     if (src >= src_end)                                 \
2027       goto invalid_code;                                \
2028     ONE_MORE_BYTE_NO_CHECK (c);                         \
2029     c -= 0xA0;                                          \
2030     if (c < 0 || c >= 81)                               \
2031       goto invalid_code;                                \
2032                                                         \
2033     gref = c / 9, nref = c % 9;                         \
2034     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2035   } while (0)
2036
2037
2038 /* Decode a composition rule represented as a component of composition
2039    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
2040    and increment BUF.  If SRC points an invalid byte sequence, set C
2041    to -1.  */
2042
2043 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
2044   do {                                                  \
2045     int gref, nref;                                     \
2046                                                         \
2047     if (src + 1>= src_end)                              \
2048       goto invalid_code;                                \
2049     ONE_MORE_BYTE_NO_CHECK (gref);                      \
2050     gref -= 0x20;                                       \
2051     ONE_MORE_BYTE_NO_CHECK (nref);                      \
2052     nref -= 0x20;                                       \
2053     if (gref < 0 || gref >= 81                          \
2054         || nref < 0 || nref >= 81)                      \
2055       goto invalid_code;                                \
2056     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2057   } while (0)
2058
2059
2060 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
2061   do {                                                                  \
2062     /* Emacs 21 style format.  The first three bytes at SRC are         \
2063        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
2064        the byte length of this composition information, CHARS is the    \
2065        number of characters composed by this composition.  */           \
2066     enum composition_method method = c - 0xF2;                          \
2067     int *charbuf_base = charbuf;                                        \
2068     int consumed_chars_limit;                                           \
2069     int nbytes, nchars;                                                 \
2070                                                                         \
2071     ONE_MORE_BYTE (c);                                                  \
2072     if (c < 0)                                                          \
2073       goto invalid_code;                                                \
2074     nbytes = c - 0xA0;                                                  \
2075     if (nbytes < 3)                                                     \
2076       goto invalid_code;                                                \
2077     ONE_MORE_BYTE (c);                                                  \
2078     if (c < 0)                                                          \
2079       goto invalid_code;                                                \
2080     nchars = c - 0xA0;                                                  \
2081     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2082     consumed_chars_limit = consumed_chars_base + nbytes;                \
2083     if (method != COMPOSITION_RELATIVE)                                 \
2084       {                                                                 \
2085         int i = 0;                                                      \
2086         while (consumed_chars < consumed_chars_limit)                   \
2087           {                                                             \
2088             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
2089               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
2090             else                                                        \
2091               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
2092             i++;                                                        \
2093           }                                                             \
2094         if (consumed_chars < consumed_chars_limit)                      \
2095           goto invalid_code;                                            \
2096         charbuf_base[0] -= i;                                           \
2097       }                                                                 \
2098   } while (0)
2099
2100
2101 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)                    \
2102   do {                                                                  \
2103     /* Emacs 20 style format for relative composition.  */              \
2104     /* Store multibyte form of characters to be composed.  */           \
2105     enum composition_method method = COMPOSITION_RELATIVE;              \
2106     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];                 \
2107     int *buf = components;                                              \
2108     int i, j;                                                           \
2109                                                                         \
2110     src = src_base;                                                     \
2111     ONE_MORE_BYTE (c);          /* skip 0x80 */                         \
2112     for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++)    \
2113       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                         \
2114     if (i < 2)                                                          \
2115       goto invalid_code;                                                \
2116     ADD_COMPOSITION_DATA (charbuf, i, method);                          \
2117     for (j = 0; j < i; j++)                                             \
2118       *charbuf++ = components[j];                                       \
2119   } while (0)
2120
2121
2122 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
2123   do {                                                          \
2124     /* Emacs 20 style format for rule-base composition.  */     \
2125     /* Store multibyte form of characters to be composed.  */   \
2126     enum composition_method method = COMPOSITION_WITH_RULE;     \
2127     int *charbuf_base = charbuf;                                \
2128     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
2129     int *buf = components;                                      \
2130     int i, j;                                                   \
2131                                                                 \
2132     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
2133     for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2134       {                                                         \
2135         if (*src < 0xA0)                                        \
2136           break;                                                \
2137         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
2138         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
2139       }                                                         \
2140     if (i <= 1 || (buf - components) % 2 == 0)                  \
2141       goto invalid_code;                                        \
2142     if (charbuf + i + (i / 2) + 1 >= charbuf_end)               \
2143       goto no_more_source;                                      \
2144     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
2145     i = i * 2 - 1;                                              \
2146     for (j = 0; j < i; j++)                                     \
2147       *charbuf++ = components[j];                               \
2148     charbuf_base[0] -= i;                                       \
2149     for (j = 0; j < i; j += 2)                                  \
2150       *charbuf++ = components[j];                               \
2151   } while (0)
2152
2153
2154 static void
2155 decode_coding_emacs_mule (coding)
2156      struct coding_system *coding;
2157 {
2158   const unsigned char *src = coding->source + coding->consumed;
2159   const unsigned char *src_end = coding->source + coding->src_bytes;
2160   const unsigned char *src_base;
2161   int *charbuf = coding->charbuf + coding->charbuf_used;
2162   int *charbuf_end
2163     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2164   int consumed_chars = 0, consumed_chars_base;
2165   int multibytep = coding->src_multibyte;
2166   Lisp_Object attrs, charset_list;
2167   int char_offset = coding->produced_char;
2168   int last_offset = char_offset;
2169   int last_id = charset_ascii;
2170   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2171   int byte_after_cr = -1;
2172
2173   CODING_GET_INFO (coding, attrs, charset_list);
2174
2175   while (1)
2176     {
2177       int c;
2178
2179       src_base = src;
2180       consumed_chars_base = consumed_chars;
2181
2182       if (charbuf >= charbuf_end)
2183         break;
2184
2185       if (byte_after_cr >= 0)
2186         c = byte_after_cr, byte_after_cr = -1;
2187       else
2188         ONE_MORE_BYTE (c);
2189       if (c < 0)
2190         {
2191           *charbuf++ = -c;
2192           char_offset++;
2193         }
2194       else if (c < 0x80)
2195         {
2196           if (eol_crlf && c == '\r')
2197             ONE_MORE_BYTE (byte_after_cr);
2198           *charbuf++ = c;
2199           char_offset++;
2200         }
2201       else if (c == 0x80)
2202         {
2203           ONE_MORE_BYTE (c);
2204           if (c < 0)
2205             goto invalid_code;
2206           if (c - 0xF2 >= COMPOSITION_RELATIVE
2207               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2208             DECODE_EMACS_MULE_21_COMPOSITION (c);
2209           else if (c < 0xC0)
2210             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2211           else if (c == 0xFF)
2212             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2213           else
2214             goto invalid_code;
2215         }
2216       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2217         {
2218           int nbytes, nchars;
2219           int id;
2220
2221           src = src_base;
2222           consumed_chars = consumed_chars_base;
2223           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2224           if (c < 0)
2225             {
2226               if (c == -2)
2227                 break;
2228               goto invalid_code;
2229             }
2230           if (last_id != id)
2231             {
2232               if (last_id != charset_ascii)
2233                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2234               last_id = id;
2235               last_offset = char_offset;
2236             }
2237           *charbuf++ = c;
2238           src += nbytes;
2239           consumed_chars += nchars;
2240           char_offset++;
2241         }
2242       else
2243         goto invalid_code;
2244       continue;
2245
2246     invalid_code:
2247       src = src_base;
2248       consumed_chars = consumed_chars_base;
2249       ONE_MORE_BYTE (c);
2250       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2251       char_offset++;
2252       coding->errors++;
2253     }
2254
2255  no_more_source:
2256   if (last_id != charset_ascii)
2257     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2258   coding->consumed_char += consumed_chars_base;
2259   coding->consumed = src_base - coding->source;
2260   coding->charbuf_used = charbuf - coding->charbuf;
2261 }
2262
2263
2264 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2265   do {                                          \
2266     if (id < 0xA0)                              \
2267       codes[0] = id, codes[1] = 0;              \
2268     else if (id < 0xE0)                         \
2269       codes[0] = 0x9A, codes[1] = id;           \
2270     else if (id < 0xF0)                         \
2271       codes[0] = 0x9B, codes[1] = id;           \
2272     else if (id < 0xF5)                         \
2273       codes[0] = 0x9C, codes[1] = id;           \
2274     else                                        \
2275       codes[0] = 0x9D, codes[1] = id;           \
2276   } while (0);
2277
2278
2279 static int
2280 encode_coding_emacs_mule (coding)
2281      struct coding_system *coding;
2282 {
2283   int multibytep = coding->dst_multibyte;
2284   int *charbuf = coding->charbuf;
2285   int *charbuf_end = charbuf + coding->charbuf_used;
2286   unsigned char *dst = coding->destination + coding->produced;
2287   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2288   int safe_room = 8;
2289   int produced_chars = 0;
2290   Lisp_Object attrs, charset_list;
2291   int c;
2292   int preferred_charset_id = -1;
2293
2294   CODING_GET_INFO (coding, attrs, charset_list);
2295   if (! EQ (charset_list, Vemacs_mule_charset_list))
2296     {
2297       CODING_ATTR_CHARSET_LIST (attrs)
2298         = charset_list = Vemacs_mule_charset_list;
2299     }
2300
2301   while (charbuf < charbuf_end)
2302     {
2303       ASSURE_DESTINATION (safe_room);
2304       c = *charbuf++;
2305
2306       if (c < 0)
2307         {
2308           /* Handle an annotation.  */
2309           switch (*charbuf)
2310             {
2311             case CODING_ANNOTATE_COMPOSITION_MASK:
2312               /* Not yet implemented.  */
2313               break;
2314             case CODING_ANNOTATE_CHARSET_MASK:
2315               preferred_charset_id = charbuf[3];
2316               if (preferred_charset_id >= 0
2317                   && NILP (Fmemq (make_number (preferred_charset_id),
2318                                   charset_list)))
2319                 preferred_charset_id = -1;
2320               break;
2321             default:
2322               abort ();
2323             }
2324           charbuf += -c - 1;
2325           continue;
2326         }
2327
2328       if (ASCII_CHAR_P (c))
2329         EMIT_ONE_ASCII_BYTE (c);
2330       else if (CHAR_BYTE8_P (c))
2331         {
2332           c = CHAR_TO_BYTE8 (c);
2333           EMIT_ONE_BYTE (c);
2334         }
2335       else
2336         {
2337           struct charset *charset;
2338           unsigned code;
2339           int dimension;
2340           int emacs_mule_id;
2341           unsigned char leading_codes[2];
2342
2343           if (preferred_charset_id >= 0)
2344             {
2345               charset = CHARSET_FROM_ID (preferred_charset_id);
2346               if (! CHAR_CHARSET_P (c, charset))
2347                 charset = char_charset (c, charset_list, NULL);
2348             }
2349           else
2350             charset = char_charset (c, charset_list, &code);
2351           if (! charset)
2352             {
2353               c = coding->default_char;
2354               if (ASCII_CHAR_P (c))
2355                 {
2356                   EMIT_ONE_ASCII_BYTE (c);
2357                   continue;
2358                 }
2359               charset = char_charset (c, charset_list, &code);
2360             }
2361           dimension = CHARSET_DIMENSION (charset);
2362           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2363           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2364           EMIT_ONE_BYTE (leading_codes[0]);
2365           if (leading_codes[1])
2366             EMIT_ONE_BYTE (leading_codes[1]);
2367           if (dimension == 1)
2368             EMIT_ONE_BYTE (code | 0x80);
2369           else
2370             {
2371               code |= 0x8080;
2372               EMIT_ONE_BYTE (code >> 8);
2373               EMIT_ONE_BYTE (code & 0xFF);
2374             }
2375         }
2376     }
2377   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2378   coding->produced_char += produced_chars;
2379   coding->produced = dst - coding->destination;
2380   return 0;
2381 }
2382
2383 \f
2384 /*** 7. ISO2022 handlers ***/
2385
2386 /* The following note describes the coding system ISO2022 briefly.
2387    Since the intention of this note is to help understand the
2388    functions in this file, some parts are NOT ACCURATE or are OVERLY
2389    SIMPLIFIED.  For thorough understanding, please refer to the
2390    original document of ISO2022.  This is equivalent to the standard
2391    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2392
2393    ISO2022 provides many mechanisms to encode several character sets
2394    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2395    is encoded using bytes less than 128.  This may make the encoded
2396    text a little bit longer, but the text passes more easily through
2397    several types of gateway, some of which strip off the MSB (Most
2398    Significant Bit).
2399
2400    There are two kinds of character sets: control character sets and
2401    graphic character sets.  The former contain control characters such
2402    as `newline' and `escape' to provide control functions (control
2403    functions are also provided by escape sequences).  The latter
2404    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2405    two control character sets and many graphic character sets.
2406
2407    Graphic character sets are classified into one of the following
2408    four classes, according to the number of bytes (DIMENSION) and
2409    number of characters in one dimension (CHARS) of the set:
2410    - DIMENSION1_CHARS94
2411    - DIMENSION1_CHARS96
2412    - DIMENSION2_CHARS94
2413    - DIMENSION2_CHARS96
2414
2415    In addition, each character set is assigned an identification tag,
2416    unique for each set, called the "final character" (denoted as <F>
2417    hereafter).  The <F> of each character set is decided by ECMA(*)
2418    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2419    (0x30..0x3F are for private use only).
2420
2421    Note (*): ECMA = European Computer Manufacturers Association
2422
2423    Here are examples of graphic character sets [NAME(<F>)]:
2424         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2425         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2426         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2427         o DIMENSION2_CHARS96 -- none for the moment
2428
2429    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2430         C0 [0x00..0x1F] -- control character plane 0
2431         GL [0x20..0x7F] -- graphic character plane 0
2432         C1 [0x80..0x9F] -- control character plane 1
2433         GR [0xA0..0xFF] -- graphic character plane 1
2434
2435    A control character set is directly designated and invoked to C0 or
2436    C1 by an escape sequence.  The most common case is that:
2437    - ISO646's  control character set is designated/invoked to C0, and
2438    - ISO6429's control character set is designated/invoked to C1,
2439    and usually these designations/invocations are omitted in encoded
2440    text.  In a 7-bit environment, only C0 can be used, and a control
2441    character for C1 is encoded by an appropriate escape sequence to
2442    fit into the environment.  All control characters for C1 are
2443    defined to have corresponding escape sequences.
2444
2445    A graphic character set is at first designated to one of four
2446    graphic registers (G0 through G3), then these graphic registers are
2447    invoked to GL or GR.  These designations and invocations can be
2448    done independently.  The most common case is that G0 is invoked to
2449    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2450    these invocations and designations are omitted in encoded text.
2451    In a 7-bit environment, only GL can be used.
2452
2453    When a graphic character set of CHARS94 is invoked to GL, codes
2454    0x20 and 0x7F of the GL area work as control characters SPACE and
2455    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2456    be used.
2457
2458    There are two ways of invocation: locking-shift and single-shift.
2459    With locking-shift, the invocation lasts until the next different
2460    invocation, whereas with single-shift, the invocation affects the
2461    following character only and doesn't affect the locking-shift
2462    state.  Invocations are done by the following control characters or
2463    escape sequences:
2464
2465    ----------------------------------------------------------------------
2466    abbrev  function                  cntrl escape seq   description
2467    ----------------------------------------------------------------------
2468    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2469    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2470    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2471    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2472    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2473    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2474    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2475    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2476    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2477    ----------------------------------------------------------------------
2478    (*) These are not used by any known coding system.
2479
2480    Control characters for these functions are defined by macros
2481    ISO_CODE_XXX in `coding.h'.
2482
2483    Designations are done by the following escape sequences:
2484    ----------------------------------------------------------------------
2485    escape sequence      description
2486    ----------------------------------------------------------------------
2487    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2488    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2489    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2490    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2491    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2492    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2493    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2494    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2495    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2496    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2497    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2498    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2499    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2500    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2501    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2502    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2503    ----------------------------------------------------------------------
2504
2505    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2506    of dimension 1, chars 94, and final character <F>, etc...
2507
2508    Note (*): Although these designations are not allowed in ISO2022,
2509    Emacs accepts them on decoding, and produces them on encoding
2510    CHARS96 character sets in a coding system which is characterized as
2511    7-bit environment, non-locking-shift, and non-single-shift.
2512
2513    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2514    '(' must be omitted.  We refer to this as "short-form" hereafter.
2515
2516    Now you may notice that there are a lot of ways of encoding the
2517    same multilingual text in ISO2022.  Actually, there exist many
2518    coding systems such as Compound Text (used in X11's inter client
2519    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2520    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2521    localized platforms), and all of these are variants of ISO2022.
2522
2523    In addition to the above, Emacs handles two more kinds of escape
2524    sequences: ISO6429's direction specification and Emacs' private
2525    sequence for specifying character composition.
2526
2527    ISO6429's direction specification takes the following form:
2528         o CSI ']'      -- end of the current direction
2529         o CSI '0' ']'  -- end of the current direction
2530         o CSI '1' ']'  -- start of left-to-right text
2531         o CSI '2' ']'  -- start of right-to-left text
2532    The control character CSI (0x9B: control sequence introducer) is
2533    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2534
2535    Character composition specification takes the following form:
2536         o ESC '0' -- start relative composition
2537         o ESC '1' -- end composition
2538         o ESC '2' -- start rule-base composition (*)
2539         o ESC '3' -- start relative composition with alternate chars  (**)
2540         o ESC '4' -- start rule-base composition with alternate chars  (**)
2541   Since these are not standard escape sequences of any ISO standard,
2542   the use of them with these meanings is restricted to Emacs only.
2543
2544   (*) This form is used only in Emacs 20.7 and older versions,
2545   but newer versions can safely decode it.
2546   (**) This form is used only in Emacs 21.1 and newer versions,
2547   and older versions can't decode it.
2548
2549   Here's a list of example usages of these composition escape
2550   sequences (categorized by `enum composition_method').
2551
2552   COMPOSITION_RELATIVE:
2553         ESC 0 CHAR [ CHAR ] ESC 1
2554   COMPOSITION_WITH_RULE:
2555         ESC 2 CHAR [ RULE CHAR ] ESC 1
2556   COMPOSITION_WITH_ALTCHARS:
2557         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2558   COMPOSITION_WITH_RULE_ALTCHARS:
2559         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2560
2561 enum iso_code_class_type iso_code_class[256];
2562
2563 #define SAFE_CHARSET_P(coding, id)      \
2564   ((id) <= (coding)->max_charset_id     \
2565    && (coding)->safe_charsets[id] >= 0)
2566
2567
2568 #define SHIFT_OUT_OK(category)  \
2569   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2570
2571 static void
2572 setup_iso_safe_charsets (attrs)
2573      Lisp_Object attrs;
2574 {
2575   Lisp_Object charset_list, safe_charsets;
2576   Lisp_Object request;
2577   Lisp_Object reg_usage;
2578   Lisp_Object tail;
2579   int reg94, reg96;
2580   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2581   int max_charset_id;
2582
2583   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2584   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2585       && ! EQ (charset_list, Viso_2022_charset_list))
2586     {
2587       CODING_ATTR_CHARSET_LIST (attrs)
2588         = charset_list = Viso_2022_charset_list;
2589       ASET (attrs, coding_attr_safe_charsets, Qnil);
2590     }
2591
2592   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2593     return;
2594
2595   max_charset_id = 0;
2596   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2597     {
2598       int id = XINT (XCAR (tail));
2599       if (max_charset_id < id)
2600         max_charset_id = id;
2601     }
2602
2603   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2604                                 make_number (255));
2605   request = AREF (attrs, coding_attr_iso_request);
2606   reg_usage = AREF (attrs, coding_attr_iso_usage);
2607   reg94 = XINT (XCAR (reg_usage));
2608   reg96 = XINT (XCDR (reg_usage));
2609
2610   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2611     {
2612       Lisp_Object id;
2613       Lisp_Object reg;
2614       struct charset *charset;
2615
2616       id = XCAR (tail);
2617       charset = CHARSET_FROM_ID (XINT (id));
2618       reg = Fcdr (Fassq (id, request));
2619       if (! NILP (reg))
2620         SSET (safe_charsets, XINT (id), XINT (reg));
2621       else if (charset->iso_chars_96)
2622         {
2623           if (reg96 < 4)
2624             SSET (safe_charsets, XINT (id), reg96);
2625         }
2626       else
2627         {
2628           if (reg94 < 4)
2629             SSET (safe_charsets, XINT (id), reg94);
2630         }
2631     }
2632   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2633 }
2634
2635
2636 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2637    Check if a text is encoded in one of ISO-2022 based codig systems.
2638    If it is, return 1, else return 0.  */
2639
2640 static int
2641 detect_coding_iso_2022 (coding, detect_info)
2642      struct coding_system *coding;
2643      struct coding_detection_info *detect_info;
2644 {
2645   const unsigned char *src = coding->source, *src_base = src;
2646   const unsigned char *src_end = coding->source + coding->src_bytes;
2647   int multibytep = coding->src_multibyte;
2648   int single_shifting = 0;
2649   int id;
2650   int c, c1;
2651   int consumed_chars = 0;
2652   int i;
2653   int rejected = 0;
2654   int found = 0;
2655
2656   detect_info->checked |= CATEGORY_MASK_ISO;
2657
2658   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2659     {
2660       struct coding_system *this = &(coding_categories[i]);
2661       Lisp_Object attrs, val;
2662
2663       attrs = CODING_ID_ATTRS (this->id);
2664       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2665           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2666         setup_iso_safe_charsets (attrs);
2667       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2668       this->max_charset_id = SCHARS (val) - 1;
2669       this->safe_charsets = (char *) SDATA (val);
2670     }
2671
2672   /* A coding system of this category is always ASCII compatible.  */
2673   src += coding->head_ascii;
2674
2675   while (rejected != CATEGORY_MASK_ISO)
2676     {
2677       src_base = src;
2678       ONE_MORE_BYTE (c);
2679       switch (c)
2680         {
2681         case ISO_CODE_ESC:
2682           if (inhibit_iso_escape_detection)
2683             break;
2684           single_shifting = 0;
2685           ONE_MORE_BYTE (c);
2686           if (c >= '(' && c <= '/')
2687             {
2688               /* Designation sequence for a charset of dimension 1.  */
2689               ONE_MORE_BYTE (c1);
2690               if (c1 < ' ' || c1 >= 0x80
2691                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2692                 /* Invalid designation sequence.  Just ignore.  */
2693                 break;
2694             }
2695           else if (c == '$')
2696             {
2697               /* Designation sequence for a charset of dimension 2.  */
2698               ONE_MORE_BYTE (c);
2699               if (c >= '@' && c <= 'B')
2700                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2701                 id = iso_charset_table[1][0][c];
2702               else if (c >= '(' && c <= '/')
2703                 {
2704                   ONE_MORE_BYTE (c1);
2705                   if (c1 < ' ' || c1 >= 0x80
2706                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2707                     /* Invalid designation sequence.  Just ignore.  */
2708                     break;
2709                 }
2710               else
2711                 /* Invalid designation sequence.  Just ignore it.  */
2712                 break;
2713             }
2714           else if (c == 'N' || c == 'O')
2715             {
2716               /* ESC <Fe> for SS2 or SS3.  */
2717               single_shifting = 1;
2718               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2719               break;
2720             }
2721           else if (c >= '0' && c <= '4')
2722             {
2723               /* ESC <Fp> for start/end composition.  */
2724               found |= CATEGORY_MASK_ISO;
2725               break;
2726             }
2727           else
2728             {
2729               /* Invalid escape sequence.  Just ignore it.  */
2730               break;
2731             }
2732
2733           /* We found a valid designation sequence for CHARSET.  */
2734           rejected |= CATEGORY_MASK_ISO_8BIT;
2735           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2736                               id))
2737             found |= CATEGORY_MASK_ISO_7;
2738           else
2739             rejected |= CATEGORY_MASK_ISO_7;
2740           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2741                               id))
2742             found |= CATEGORY_MASK_ISO_7_TIGHT;
2743           else
2744             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2745           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2746                               id))
2747             found |= CATEGORY_MASK_ISO_7_ELSE;
2748           else
2749             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2750           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2751                               id))
2752             found |= CATEGORY_MASK_ISO_8_ELSE;
2753           else
2754             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2755           break;
2756
2757         case ISO_CODE_SO:
2758         case ISO_CODE_SI:
2759           /* Locking shift out/in.  */
2760           if (inhibit_iso_escape_detection)
2761             break;
2762           single_shifting = 0;
2763           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2764           break;
2765
2766         case ISO_CODE_CSI:
2767           /* Control sequence introducer.  */
2768           single_shifting = 0;
2769           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2770           found |= CATEGORY_MASK_ISO_8_ELSE;
2771           goto check_extra_latin;
2772
2773         case ISO_CODE_SS2:
2774         case ISO_CODE_SS3:
2775           /* Single shift.   */
2776           if (inhibit_iso_escape_detection)
2777             break;
2778           single_shifting = 0;
2779           rejected |= CATEGORY_MASK_ISO_7BIT;
2780           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2781               & CODING_ISO_FLAG_SINGLE_SHIFT)
2782             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2783           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2784               & CODING_ISO_FLAG_SINGLE_SHIFT)
2785             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2786           if (single_shifting)
2787             break;
2788           goto check_extra_latin;
2789
2790         default:
2791           if (c < 0)
2792             continue;
2793           if (c < 0x80)
2794             {
2795               single_shifting = 0;
2796               break;
2797             }
2798           if (c >= 0xA0)
2799             {
2800               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2801               found |= CATEGORY_MASK_ISO_8_1;
2802               /* Check the length of succeeding codes of the range
2803                  0xA0..0FF.  If the byte length is even, we include
2804                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2805                  only when we are not single shifting.  */
2806               if (! single_shifting
2807                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2808                 {
2809                   int i = 1;
2810                   while (src < src_end)
2811                     {
2812                       ONE_MORE_BYTE (c);
2813                       if (c < 0xA0)
2814                         break;
2815                       i++;
2816                     }
2817
2818                   if (i & 1 && src < src_end)
2819                     rejected |= CATEGORY_MASK_ISO_8_2;
2820                   else
2821                     found |= CATEGORY_MASK_ISO_8_2;
2822                 }
2823               break;
2824             }
2825         check_extra_latin:
2826           single_shifting = 0;
2827           if (! VECTORP (Vlatin_extra_code_table)
2828               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2829             {
2830               rejected = CATEGORY_MASK_ISO;
2831               break;
2832             }
2833           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2834               & CODING_ISO_FLAG_LATIN_EXTRA)
2835             found |= CATEGORY_MASK_ISO_8_1;
2836           else
2837             rejected |= CATEGORY_MASK_ISO_8_1;
2838           rejected |= CATEGORY_MASK_ISO_8_2;
2839         }
2840     }
2841   detect_info->rejected |= CATEGORY_MASK_ISO;
2842   return 0;
2843
2844  no_more_source:
2845   detect_info->rejected |= rejected;
2846   detect_info->found |= (found & ~rejected);
2847   return 1;
2848 }
2849
2850
2851 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
2852    escape sequence should be kept.  */
2853 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2854   do {                                                                  \
2855     int id, prev;                                                       \
2856                                                                         \
2857     if (final < '0' || final >= 128                                     \
2858         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2859         || !SAFE_CHARSET_P (coding, id))                                \
2860       {                                                                 \
2861         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2862         chars_96 = -1;                                                  \
2863         break;                                                          \
2864       }                                                                 \
2865     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2866     if (id == charset_jisx0201_roman)                                   \
2867       {                                                                 \
2868         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2869           id = charset_ascii;                                           \
2870       }                                                                 \
2871     else if (id == charset_jisx0208_1978)                               \
2872       {                                                                 \
2873         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2874           id = charset_jisx0208;                                        \
2875       }                                                                 \
2876     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2877     /* If there was an invalid designation to REG previously, and this  \
2878        designation is ASCII to REG, we should keep this designation     \
2879        sequence.  */                                                    \
2880     if (prev == -2 && id == charset_ascii)                              \
2881       chars_96 = -1;                                                    \
2882   } while (0)
2883
2884
2885 #define MAYBE_FINISH_COMPOSITION()                              \
2886   do {                                                          \
2887     int i;                                                      \
2888     if (composition_state == COMPOSING_NO)                      \
2889       break;                                                    \
2890     /* It is assured that we have enough room for producing     \
2891        characters stored in the table `components'.  */         \
2892     if (charbuf + component_idx > charbuf_end)                  \
2893       goto no_more_source;                                      \
2894     composition_state = COMPOSING_NO;                           \
2895     if (method == COMPOSITION_RELATIVE                          \
2896         || method == COMPOSITION_WITH_ALTCHARS)                 \
2897       {                                                         \
2898         for (i = 0; i < component_idx; i++)                     \
2899           *charbuf++ = components[i];                           \
2900         char_offset += component_idx;                           \
2901       }                                                         \
2902     else                                                        \
2903       {                                                         \
2904         for (i = 0; i < component_idx; i += 2)                  \
2905           *charbuf++ = components[i];                           \
2906         char_offset += (component_idx / 2) + 1;                 \
2907       }                                                         \
2908   } while (0)
2909
2910
2911 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2912    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2913    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2914    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2915    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2916   */
2917
2918 #define DECODE_COMPOSITION_START(c1)                                    \
2919   do {                                                                  \
2920     if (c1 == '0'                                                       \
2921         && composition_state == COMPOSING_COMPONENT_RULE)               \
2922       {                                                                 \
2923         component_len = component_idx;                                  \
2924         composition_state = COMPOSING_CHAR;                             \
2925       }                                                                 \
2926     else                                                                \
2927       {                                                                 \
2928         const unsigned char *p;                                         \
2929                                                                         \
2930         MAYBE_FINISH_COMPOSITION ();                                    \
2931         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2932           goto no_more_source;                                          \
2933         for (p = src; p < src_end - 1; p++)                             \
2934           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2935             break;                                                      \
2936         if (p == src_end - 1)                                           \
2937           {                                                             \
2938             /* The current composition doesn't end in the current       \
2939                source.  */                                              \
2940             record_conversion_result                                    \
2941               (coding, CODING_RESULT_INSUFFICIENT_SRC);                 \
2942             goto no_more_source;                                        \
2943           }                                                             \
2944                                                                         \
2945         /* This is surely the start of a composition.  */               \
2946         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2947                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2948                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2949                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2950         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2951                              : COMPOSING_COMPONENT_CHAR);               \
2952         component_idx = component_len = 0;                              \
2953       }                                                                 \
2954   } while (0)
2955
2956
2957 /* Handle compositoin end sequence ESC 1.  */
2958
2959 #define DECODE_COMPOSITION_END()                                        \
2960   do {                                                                  \
2961     int nchars = (component_len > 0 ? component_idx - component_len     \
2962                   : method == COMPOSITION_RELATIVE ? component_idx      \
2963                   : (component_idx + 1) / 2);                           \
2964     int i;                                                              \
2965     int *saved_charbuf = charbuf;                                       \
2966                                                                         \
2967     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2968     if (method != COMPOSITION_RELATIVE)                                 \
2969       {                                                                 \
2970         if (component_len == 0)                                         \
2971           for (i = 0; i < component_idx; i++)                           \
2972             *charbuf++ = components[i];                                 \
2973         else                                                            \
2974           for (i = 0; i < component_len; i++)                           \
2975             *charbuf++ = components[i];                                 \
2976         *saved_charbuf = saved_charbuf - charbuf;                       \
2977       }                                                                 \
2978     if (method == COMPOSITION_WITH_RULE)                                \
2979       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2980         *charbuf++ = components[i];                                     \
2981     else                                                                \
2982       for (i = component_len; i < component_idx; i++, char_offset++)    \
2983         *charbuf++ = components[i];                                     \
2984     coding->annotated = 1;                                              \
2985     composition_state = COMPOSING_NO;                                   \
2986   } while (0)
2987
2988
2989 /* Decode a composition rule from the byte C1 (and maybe one more byte
2990    from SRC) and store one encoded composition rule in
2991    coding->cmp_data.  */
2992
2993 #define DECODE_COMPOSITION_RULE(c1)                                     \
2994   do {                                                                  \
2995     (c1) -= 32;                                                         \
2996     if (c1 < 81)                /* old format (before ver.21) */        \
2997       {                                                                 \
2998         int gref = (c1) / 9;                                            \
2999         int nref = (c1) % 9;                                            \
3000         if (gref == 4) gref = 10;                                       \
3001         if (nref == 4) nref = 10;                                       \
3002         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
3003       }                                                                 \
3004     else if (c1 < 93)           /* new format (after ver.21) */         \
3005       {                                                                 \
3006         ONE_MORE_BYTE (c2);                                             \
3007         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
3008       }                                                                 \
3009     else                                                                \
3010       c1 = 0;                                                           \
3011   } while (0)
3012
3013
3014 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3015
3016 static void
3017 decode_coding_iso_2022 (coding)
3018      struct coding_system *coding;
3019 {
3020   const unsigned char *src = coding->source + coding->consumed;
3021   const unsigned char *src_end = coding->source + coding->src_bytes;
3022   const unsigned char *src_base;
3023   int *charbuf = coding->charbuf + coding->charbuf_used;
3024   int *charbuf_end
3025     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
3026   int consumed_chars = 0, consumed_chars_base;
3027   int multibytep = coding->src_multibyte;
3028   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3029   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3030   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3031   int charset_id_2, charset_id_3;
3032   struct charset *charset;
3033   int c;
3034   /* For handling composition sequence.  */
3035 #define COMPOSING_NO                    0
3036 #define COMPOSING_CHAR                  1
3037 #define COMPOSING_RULE                  2
3038 #define COMPOSING_COMPONENT_CHAR        3
3039 #define COMPOSING_COMPONENT_RULE        4
3040
3041   int composition_state = COMPOSING_NO;
3042   enum composition_method method;
3043   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3044   int component_idx;
3045   int component_len;
3046   Lisp_Object attrs, charset_list;
3047   int char_offset = coding->produced_char;
3048   int last_offset = char_offset;
3049   int last_id = charset_ascii;
3050   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3051   int byte_after_cr = -1;
3052
3053   CODING_GET_INFO (coding, attrs, charset_list);
3054   setup_iso_safe_charsets (attrs);
3055   /* Charset list may have been changed.  */
3056   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3057   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3058
3059   while (1)
3060     {
3061       int c1, c2;
3062
3063       src_base = src;
3064       consumed_chars_base = consumed_chars;
3065
3066       if (charbuf >= charbuf_end)
3067         break;
3068
3069       if (byte_after_cr >= 0)
3070         c1 = byte_after_cr, byte_after_cr = -1;
3071       else
3072         ONE_MORE_BYTE (c1);
3073       if (c1 < 0)
3074         goto invalid_code;
3075
3076       /* We produce at most one character.  */
3077       switch (iso_code_class [c1])
3078         {
3079         case ISO_0x20_or_0x7F:
3080           if (composition_state != COMPOSING_NO)
3081             {
3082               if (composition_state == COMPOSING_RULE
3083                   || composition_state == COMPOSING_COMPONENT_RULE)
3084                 {
3085                   DECODE_COMPOSITION_RULE (c1);
3086                   components[component_idx++] = c1;
3087                   composition_state--;
3088                   continue;
3089                 }
3090             }
3091           if (charset_id_0 < 0
3092               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3093             /* This is SPACE or DEL.  */
3094             charset = CHARSET_FROM_ID (charset_ascii);
3095           else
3096             charset = CHARSET_FROM_ID (charset_id_0);
3097           break;
3098
3099         case ISO_graphic_plane_0:
3100           if (composition_state != COMPOSING_NO)
3101             {
3102               if (composition_state == COMPOSING_RULE
3103                   || composition_state == COMPOSING_COMPONENT_RULE)
3104                 {
3105                   DECODE_COMPOSITION_RULE (c1);
3106                   components[component_idx++] = c1;
3107                   composition_state--;
3108                   continue;
3109                 }
3110             }
3111           if (charset_id_0 < 0)
3112             charset = CHARSET_FROM_ID (charset_ascii);
3113           else
3114             charset = CHARSET_FROM_ID (charset_id_0);
3115           break;
3116
3117         case ISO_0xA0_or_0xFF:
3118           if (charset_id_1 < 0
3119               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3120               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3121             goto invalid_code;
3122           /* This is a graphic character, we fall down ... */
3123
3124         case ISO_graphic_plane_1:
3125           if (charset_id_1 < 0)
3126             goto invalid_code;
3127           charset = CHARSET_FROM_ID (charset_id_1);
3128           break;
3129
3130         case ISO_control_0:
3131           if (eol_crlf && c1 == '\r')
3132             ONE_MORE_BYTE (byte_after_cr);
3133           MAYBE_FINISH_COMPOSITION ();
3134           charset = CHARSET_FROM_ID (charset_ascii);
3135           break;
3136
3137         case ISO_control_1:
3138           MAYBE_FINISH_COMPOSITION ();
3139           goto invalid_code;
3140
3141         case ISO_shift_out:
3142           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3143               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3144             goto invalid_code;
3145           CODING_ISO_INVOCATION (coding, 0) = 1;
3146           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3147           continue;
3148
3149         case ISO_shift_in:
3150           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3151             goto invalid_code;
3152           CODING_ISO_INVOCATION (coding, 0) = 0;
3153           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3154           continue;
3155
3156         case ISO_single_shift_2_7:
3157         case ISO_single_shift_2:
3158           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3159             goto invalid_code;
3160           /* SS2 is handled as an escape sequence of ESC 'N' */
3161           c1 = 'N';
3162           goto label_escape_sequence;
3163
3164         case ISO_single_shift_3:
3165           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3166             goto invalid_code;
3167           /* SS2 is handled as an escape sequence of ESC 'O' */
3168           c1 = 'O';
3169           goto label_escape_sequence;
3170
3171         case ISO_control_sequence_introducer:
3172           /* CSI is handled as an escape sequence of ESC '[' ...  */
3173           c1 = '[';
3174           goto label_escape_sequence;
3175
3176         case ISO_escape:
3177           ONE_MORE_BYTE (c1);
3178         label_escape_sequence:
3179           /* Escape sequences handled here are invocation,
3180              designation, direction specification, and character
3181              composition specification.  */
3182           switch (c1)
3183             {
3184             case '&':           /* revision of following character set */
3185               ONE_MORE_BYTE (c1);
3186               if (!(c1 >= '@' && c1 <= '~'))
3187                 goto invalid_code;
3188               ONE_MORE_BYTE (c1);
3189               if (c1 != ISO_CODE_ESC)
3190                 goto invalid_code;
3191               ONE_MORE_BYTE (c1);
3192               goto label_escape_sequence;
3193
3194             case '$':           /* designation of 2-byte character set */
3195               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3196                 goto invalid_code;
3197               {
3198                 int reg, chars96;
3199
3200                 ONE_MORE_BYTE (c1);
3201                 if (c1 >= '@' && c1 <= 'B')
3202                   {     /* designation of JISX0208.1978, GB2312.1980,
3203                            or JISX0208.1980 */
3204                     reg = 0, chars96 = 0;
3205                   }
3206                 else if (c1 >= 0x28 && c1 <= 0x2B)
3207                   { /* designation of DIMENSION2_CHARS94 character set */
3208                     reg = c1 - 0x28, chars96 = 0;
3209                     ONE_MORE_BYTE (c1);
3210                   }
3211                 else if (c1 >= 0x2C && c1 <= 0x2F)
3212                   { /* designation of DIMENSION2_CHARS96 character set */
3213                     reg = c1 - 0x2C, chars96 = 1;
3214                     ONE_MORE_BYTE (c1);
3215                   }
3216                 else
3217                   goto invalid_code;
3218                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3219                 /* We must update these variables now.  */
3220                 if (reg == 0)
3221                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3222                 else if (reg == 1)
3223                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3224                 if (chars96 < 0)
3225                   goto invalid_code;
3226               }
3227               continue;
3228
3229             case 'n':           /* invocation of locking-shift-2 */
3230               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3231                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3232                 goto invalid_code;
3233               CODING_ISO_INVOCATION (coding, 0) = 2;
3234               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3235               continue;
3236
3237             case 'o':           /* invocation of locking-shift-3 */
3238               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3239                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3240                 goto invalid_code;
3241               CODING_ISO_INVOCATION (coding, 0) = 3;
3242               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3243               continue;
3244
3245             case 'N':           /* invocation of single-shift-2 */
3246               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3247                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3248                 goto invalid_code;
3249               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3250               if (charset_id_2 < 0)
3251                 charset = CHARSET_FROM_ID (charset_ascii);
3252               else
3253                 charset = CHARSET_FROM_ID (charset_id_2);
3254               ONE_MORE_BYTE (c1);
3255               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3256                 goto invalid_code;
3257               break;
3258
3259             case 'O':           /* invocation of single-shift-3 */
3260               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3261                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3262                 goto invalid_code;
3263               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3264               if (charset_id_3 < 0)
3265                 charset = CHARSET_FROM_ID (charset_ascii);
3266               else
3267                 charset = CHARSET_FROM_ID (charset_id_3);
3268               ONE_MORE_BYTE (c1);
3269               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3270                 goto invalid_code;
3271               break;
3272
3273             case '0': case '2': case '3': case '4': /* start composition */
3274               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3275                 goto invalid_code;
3276               DECODE_COMPOSITION_START (c1);
3277               continue;
3278
3279             case '1':           /* end composition */
3280               if (composition_state == COMPOSING_NO)
3281                 goto invalid_code;
3282               DECODE_COMPOSITION_END ();
3283               continue;
3284
3285             case '[':           /* specification of direction */
3286               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3287                 goto invalid_code;
3288               /* For the moment, nested direction is not supported.
3289                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3290                  left-to-right, and nozero means right-to-left.  */
3291               ONE_MORE_BYTE (c1);
3292               switch (c1)
3293                 {
3294                 case ']':       /* end of the current direction */
3295                   coding->mode &= ~CODING_MODE_DIRECTION;
3296
3297                 case '0':       /* end of the current direction */
3298                 case '1':       /* start of left-to-right direction */
3299                   ONE_MORE_BYTE (c1);
3300                   if (c1 == ']')
3301                     coding->mode &= ~CODING_MODE_DIRECTION;
3302                   else
3303                     goto invalid_code;
3304                   break;
3305
3306                 case '2':       /* start of right-to-left direction */
3307                   ONE_MORE_BYTE (c1);
3308                   if (c1 == ']')
3309                     coding->mode |= CODING_MODE_DIRECTION;
3310                   else
3311                     goto invalid_code;
3312                   break;
3313
3314                 default:
3315                   goto invalid_code;
3316                 }
3317               continue;
3318
3319             case '%':
3320               ONE_MORE_BYTE (c1);
3321               if (c1 == '/')
3322                 {
3323                   /* CTEXT extended segment:
3324                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3325                      We keep these bytes as is for the moment.
3326                      They may be decoded by post-read-conversion.  */
3327                   int dim, M, L;
3328                   int size;
3329
3330                   ONE_MORE_BYTE (dim);
3331                   ONE_MORE_BYTE (M);
3332                   ONE_MORE_BYTE (L);
3333                   size = ((M - 128) * 128) + (L - 128);
3334                   if (charbuf + 8 + size > charbuf_end)
3335                     goto break_loop;
3336                   *charbuf++ = ISO_CODE_ESC;
3337                   *charbuf++ = '%';
3338                   *charbuf++ = '/';
3339                   *charbuf++ = dim;
3340                   *charbuf++ = BYTE8_TO_CHAR (M);
3341                   *charbuf++ = BYTE8_TO_CHAR (L);
3342                   while (size-- > 0)
3343                     {
3344                       ONE_MORE_BYTE (c1);
3345                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3346                     }
3347                 }
3348               else if (c1 == 'G')
3349                 {
3350                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3351                      ESC % G --UTF-8-BYTES-- ESC % @
3352                      We keep these bytes as is for the moment.
3353                      They may be decoded by post-read-conversion.  */
3354                   int *p = charbuf;
3355
3356                   if (p + 6 > charbuf_end)
3357                     goto break_loop;
3358                   *p++ = ISO_CODE_ESC;
3359                   *p++ = '%';
3360                   *p++ = 'G';
3361                   while (p < charbuf_end)
3362                     {
3363                       ONE_MORE_BYTE (c1);
3364                       if (c1 == ISO_CODE_ESC
3365                           && src + 1 < src_end
3366                           && src[0] == '%'
3367                           && src[1] == '@')
3368                         {
3369                           src += 2;
3370                           break;
3371                         }
3372                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3373                     }
3374                   if (p + 3 > charbuf_end)
3375                     goto break_loop;
3376                   *p++ = ISO_CODE_ESC;
3377                   *p++ = '%';
3378                   *p++ = '@';
3379                   charbuf = p;
3380                 }
3381               else
3382                 goto invalid_code;
3383               continue;
3384               break;
3385
3386             default:
3387               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3388                 goto invalid_code;
3389               {
3390                 int reg, chars96;
3391
3392                 if (c1 >= 0x28 && c1 <= 0x2B)
3393                   { /* designation of DIMENSION1_CHARS94 character set */
3394                     reg = c1 - 0x28, chars96 = 0;
3395                     ONE_MORE_BYTE (c1);
3396                   }
3397                 else if (c1 >= 0x2C && c1 <= 0x2F)
3398                   { /* designation of DIMENSION1_CHARS96 character set */
3399                     reg = c1 - 0x2C, chars96 = 1;
3400                     ONE_MORE_BYTE (c1);
3401                   }
3402                 else
3403                   goto invalid_code;
3404                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3405                 /* We must update these variables now.  */
3406                 if (reg == 0)
3407                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3408                 else if (reg == 1)
3409                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3410                 if (chars96 < 0)
3411                   goto invalid_code;
3412               }
3413               continue;
3414             }
3415         }
3416
3417       if (charset->id != charset_ascii
3418           && last_id != charset->id)
3419         {
3420           if (last_id != charset_ascii)
3421             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3422           last_id = charset->id;
3423           last_offset = char_offset;
3424         }
3425
3426       /* Now we know CHARSET and 1st position code C1 of a character.
3427          Produce a decoded character while getting 2nd position code
3428          C2 if necessary.  */
3429       c1 &= 0x7F;
3430       if (CHARSET_DIMENSION (charset) > 1)
3431         {
3432           ONE_MORE_BYTE (c2);
3433           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3434             /* C2 is not in a valid range.  */
3435             goto invalid_code;
3436           c1 = (c1 << 8) | (c2 & 0x7F);
3437           if (CHARSET_DIMENSION (charset) > 2)
3438             {
3439               ONE_MORE_BYTE (c2);
3440               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3441                 /* C2 is not in a valid range.  */
3442                 goto invalid_code;
3443               c1 = (c1 << 8) | (c2 & 0x7F);
3444             }
3445         }
3446
3447       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3448       if (c < 0)
3449         {
3450           MAYBE_FINISH_COMPOSITION ();
3451           for (; src_base < src; src_base++, char_offset++)
3452             {
3453               if (ASCII_BYTE_P (*src_base))
3454                 *charbuf++ = *src_base;
3455               else
3456                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3457             }
3458         }
3459       else if (composition_state == COMPOSING_NO)
3460         {
3461           *charbuf++ = c;
3462           char_offset++;
3463         }
3464       else
3465         {
3466           components[component_idx++] = c;
3467           if (method == COMPOSITION_WITH_RULE
3468               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3469                   && composition_state == COMPOSING_COMPONENT_CHAR))
3470             composition_state++;
3471         }
3472       continue;
3473
3474     invalid_code:
3475       MAYBE_FINISH_COMPOSITION ();
3476       src = src_base;
3477       consumed_chars = consumed_chars_base;
3478       ONE_MORE_BYTE (c);
3479       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3480       char_offset++;
3481       coding->errors++;
3482       continue;
3483
3484     break_loop:
3485       break;
3486     }
3487
3488  no_more_source:
3489   if (last_id != charset_ascii)
3490     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3491   coding->consumed_char += consumed_chars_base;
3492   coding->consumed = src_base - coding->source;
3493   coding->charbuf_used = charbuf - coding->charbuf;
3494 }
3495
3496
3497 /* ISO2022 encoding stuff.  */
3498
3499 /*
3500    It is not enough to say just "ISO2022" on encoding, we have to
3501    specify more details.  In Emacs, each coding system of ISO2022
3502    variant has the following specifications:
3503         1. Initial designation to G0 thru G3.
3504         2. Allows short-form designation?
3505         3. ASCII should be designated to G0 before control characters?
3506         4. ASCII should be designated to G0 at end of line?
3507         5. 7-bit environment or 8-bit environment?
3508         6. Use locking-shift?
3509         7. Use Single-shift?
3510    And the following two are only for Japanese:
3511         8. Use ASCII in place of JIS0201-1976-Roman?
3512         9. Use JISX0208-1983 in place of JISX0208-1978?
3513    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3514    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3515    details.
3516 */
3517
3518 /* Produce codes (escape sequence) for designating CHARSET to graphic
3519    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3520    '@', 'A', or 'B' and the coding system CODING allows, produce
3521    designation sequence of short-form.  */
3522
3523 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3524   do {                                                                  \
3525     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3526     char *intermediate_char_94 = "()*+";                                \
3527     char *intermediate_char_96 = ",-./";                                \
3528     int revision = -1;                                                  \
3529     int c;                                                              \
3530                                                                         \
3531     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3532       revision = CHARSET_ISO_REVISION (charset);                        \
3533                                                                         \
3534     if (revision >= 0)                                                  \
3535       {                                                                 \
3536         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3537         EMIT_ONE_BYTE ('@' + revision);                                 \
3538       }                                                                 \
3539     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3540     if (CHARSET_DIMENSION (charset) == 1)                               \
3541       {                                                                 \
3542         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3543           c = intermediate_char_94[reg];                                \
3544         else                                                            \
3545           c = intermediate_char_96[reg];                                \
3546         EMIT_ONE_ASCII_BYTE (c);                                        \
3547       }                                                                 \
3548     else                                                                \
3549       {                                                                 \
3550         EMIT_ONE_ASCII_BYTE ('$');                                      \
3551         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3552           {                                                             \
3553             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3554                 || reg != 0                                             \
3555                 || final_char < '@' || final_char > 'B')                \
3556               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3557           }                                                             \
3558         else                                                            \
3559           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3560       }                                                                 \
3561     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3562                                                                         \
3563     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3564   } while (0)
3565
3566
3567 /* The following two macros produce codes (control character or escape
3568    sequence) for ISO2022 single-shift functions (single-shift-2 and
3569    single-shift-3).  */
3570
3571 #define ENCODE_SINGLE_SHIFT_2                                           \
3572   do {                                                                  \
3573     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3574       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3575     else                                                                \
3576       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3577     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3578   } while (0)
3579
3580
3581 #define ENCODE_SINGLE_SHIFT_3                                           \
3582   do {                                                                  \
3583     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3584       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3585     else                                                                \
3586       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3587     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3588   } while (0)
3589
3590
3591 /* The following four macros produce codes (control character or
3592    escape sequence) for ISO2022 locking-shift functions (shift-in,
3593    shift-out, locking-shift-2, and locking-shift-3).  */
3594
3595 #define ENCODE_SHIFT_IN                                 \
3596   do {                                                  \
3597     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3598     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3599   } while (0)
3600
3601
3602 #define ENCODE_SHIFT_OUT                                \
3603   do {                                                  \
3604     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3605     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3606   } while (0)
3607
3608
3609 #define ENCODE_LOCKING_SHIFT_2                          \
3610   do {                                                  \
3611     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3612     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3613   } while (0)
3614
3615
3616 #define ENCODE_LOCKING_SHIFT_3                          \
3617   do {                                                  \
3618     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3619     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3620   } while (0)
3621
3622
3623 /* Produce codes for a DIMENSION1 character whose character set is
3624    CHARSET and whose position-code is C1.  Designation and invocation
3625    sequences are also produced in advance if necessary.  */
3626
3627 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3628   do {                                                                  \
3629     int id = CHARSET_ID (charset);                                      \
3630                                                                         \
3631     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3632         && id == charset_ascii)                                         \
3633       {                                                                 \
3634         id = charset_jisx0201_roman;                                    \
3635         charset = CHARSET_FROM_ID (id);                                 \
3636       }                                                                 \
3637                                                                         \
3638     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3639       {                                                                 \
3640         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3641           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3642         else                                                            \
3643           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3644         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3645         break;                                                          \
3646       }                                                                 \
3647     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3648       {                                                                 \
3649         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3650         break;                                                          \
3651       }                                                                 \
3652     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3653       {                                                                 \
3654         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3655         break;                                                          \
3656       }                                                                 \
3657     else                                                                \
3658       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3659          must invoke it, or, at first, designate it to some graphic     \
3660          register.  Then repeat the loop to actually produce the        \
3661          character.  */                                                 \
3662       dst = encode_invocation_designation (charset, coding, dst,        \
3663                                            &produced_chars);            \
3664   } while (1)
3665
3666
3667 /* Produce codes for a DIMENSION2 character whose character set is
3668    CHARSET and whose position-codes are C1 and C2.  Designation and
3669    invocation codes are also produced in advance if necessary.  */
3670
3671 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3672   do {                                                                  \
3673     int id = CHARSET_ID (charset);                                      \
3674                                                                         \
3675     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3676         && id == charset_jisx0208)                                      \
3677       {                                                                 \
3678         id = charset_jisx0208_1978;                                     \
3679         charset = CHARSET_FROM_ID (id);                                 \
3680       }                                                                 \
3681                                                                         \
3682     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3683       {                                                                 \
3684         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3685           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3686         else                                                            \
3687           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3688         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3689         break;                                                          \
3690       }                                                                 \
3691     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3692       {                                                                 \
3693         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3694         break;                                                          \
3695       }                                                                 \
3696     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3697       {                                                                 \
3698         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3699         break;                                                          \
3700       }                                                                 \
3701     else                                                                \
3702       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3703          must invoke it, or, at first, designate it to some graphic     \
3704          register.  Then repeat the loop to actually produce the        \
3705          character.  */                                                 \
3706       dst = encode_invocation_designation (charset, coding, dst,        \
3707                                            &produced_chars);            \
3708   } while (1)
3709
3710
3711 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3712   do {                                                                     \
3713     int code = ENCODE_CHAR ((charset),(c));                                \
3714                                                                            \
3715     if (CHARSET_DIMENSION (charset) == 1)                                  \
3716       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3717     else                                                                   \
3718       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3719   } while (0)
3720
3721
3722 /* Produce designation and invocation codes at a place pointed by DST
3723    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3724    Return new DST.  */
3725
3726 unsigned char *
3727 encode_invocation_designation (charset, coding, dst, p_nchars)
3728      struct charset *charset;
3729      struct coding_system *coding;
3730      unsigned char *dst;
3731      int *p_nchars;
3732 {
3733   int multibytep = coding->dst_multibyte;
3734   int produced_chars = *p_nchars;
3735   int reg;                      /* graphic register number */
3736   int id = CHARSET_ID (charset);
3737
3738   /* At first, check designations.  */
3739   for (reg = 0; reg < 4; reg++)
3740     if (id == CODING_ISO_DESIGNATION (coding, reg))
3741       break;
3742
3743   if (reg >= 4)
3744     {
3745       /* CHARSET is not yet designated to any graphic registers.  */
3746       /* At first check the requested designation.  */
3747       reg = CODING_ISO_REQUEST (coding, id);
3748       if (reg < 0)
3749         /* Since CHARSET requests no special designation, designate it
3750            to graphic register 0.  */
3751         reg = 0;
3752
3753       ENCODE_DESIGNATION (charset, reg, coding);
3754     }
3755
3756   if (CODING_ISO_INVOCATION (coding, 0) != reg
3757       && CODING_ISO_INVOCATION (coding, 1) != reg)
3758     {
3759       /* Since the graphic register REG is not invoked to any graphic
3760          planes, invoke it to graphic plane 0.  */
3761       switch (reg)
3762         {
3763         case 0:                 /* graphic register 0 */
3764           ENCODE_SHIFT_IN;
3765           break;
3766
3767         case 1:                 /* graphic register 1 */
3768           ENCODE_SHIFT_OUT;
3769           break;
3770
3771         case 2:                 /* graphic register 2 */
3772           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3773             ENCODE_SINGLE_SHIFT_2;
3774           else
3775             ENCODE_LOCKING_SHIFT_2;
3776           break;
3777
3778         case 3:                 /* graphic register 3 */
3779           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3780             ENCODE_SINGLE_SHIFT_3;
3781           else
3782             ENCODE_LOCKING_SHIFT_3;
3783           break;
3784         }
3785     }
3786
3787   *p_nchars = produced_chars;
3788   return dst;
3789 }
3790
3791 /* The following three macros produce codes for indicating direction
3792    of text.  */
3793 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3794   do {                                                                  \
3795     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3796       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3797     else                                                                \
3798       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3799   } while (0)
3800
3801
3802 #define ENCODE_DIRECTION_R2L()                  \
3803   do {                                          \
3804     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3805     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3806   } while (0)
3807
3808
3809 #define ENCODE_DIRECTION_L2R()                  \
3810   do {                                          \
3811     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3812     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3813   } while (0)
3814
3815
3816 /* Produce codes for designation and invocation to reset the graphic
3817    planes and registers to initial state.  */
3818 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3819   do {                                                                  \
3820     int reg;                                                            \
3821     struct charset *charset;                                            \
3822                                                                         \
3823     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3824       ENCODE_SHIFT_IN;                                                  \
3825     for (reg = 0; reg < 4; reg++)                                       \
3826       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3827           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3828               != CODING_ISO_INITIAL (coding, reg)))                     \
3829         {                                                               \
3830           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3831           ENCODE_DESIGNATION (charset, reg, coding);                    \
3832         }                                                               \
3833   } while (0)
3834
3835
3836 /* Produce designation sequences of charsets in the line started from
3837    SRC to a place pointed by DST, and return updated DST.
3838
3839    If the current block ends before any end-of-line, we may fail to
3840    find all the necessary designations.  */
3841
3842 static unsigned char *
3843 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3844      struct coding_system *coding;
3845      int *charbuf, *charbuf_end;
3846      unsigned char *dst;
3847 {
3848   struct charset *charset;
3849   /* Table of charsets to be designated to each graphic register.  */
3850   int r[4];
3851   int c, found = 0, reg;
3852   int produced_chars = 0;
3853   int multibytep = coding->dst_multibyte;
3854   Lisp_Object attrs;
3855   Lisp_Object charset_list;
3856
3857   attrs = CODING_ID_ATTRS (coding->id);
3858   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3859   if (EQ (charset_list, Qiso_2022))
3860     charset_list = Viso_2022_charset_list;
3861
3862   for (reg = 0; reg < 4; reg++)
3863     r[reg] = -1;
3864
3865   while (found < 4)
3866     {
3867       int id;
3868
3869       c = *charbuf++;
3870       if (c == '\n')
3871         break;
3872       charset = char_charset (c, charset_list, NULL);
3873       id = CHARSET_ID (charset);
3874       reg = CODING_ISO_REQUEST (coding, id);
3875       if (reg >= 0 && r[reg] < 0)
3876         {
3877           found++;
3878           r[reg] = id;
3879         }
3880     }
3881
3882   if (found)
3883     {
3884       for (reg = 0; reg < 4; reg++)
3885         if (r[reg] >= 0
3886             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3887           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3888     }
3889
3890   return dst;
3891 }
3892
3893 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3894
3895 static int
3896 encode_coding_iso_2022 (coding)
3897      struct coding_system *coding;
3898 {
3899   int multibytep = coding->dst_multibyte;
3900   int *charbuf = coding->charbuf;
3901   int *charbuf_end = charbuf + coding->charbuf_used;
3902   unsigned char *dst = coding->destination + coding->produced;
3903   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3904   int safe_room = 16;
3905   int bol_designation
3906     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3907        && CODING_ISO_BOL (coding));
3908   int produced_chars = 0;
3909   Lisp_Object attrs, eol_type, charset_list;
3910   int ascii_compatible;
3911   int c;
3912   int preferred_charset_id = -1;
3913
3914   CODING_GET_INFO (coding, attrs, charset_list);
3915   eol_type = CODING_ID_EOL_TYPE (coding->id);
3916   if (VECTORP (eol_type))
3917     eol_type = Qunix;
3918
3919   setup_iso_safe_charsets (attrs);
3920   /* Charset list may have been changed.  */
3921   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3922   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3923
3924   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3925
3926   while (charbuf < charbuf_end)
3927     {
3928       ASSURE_DESTINATION (safe_room);
3929
3930       if (bol_designation)
3931         {
3932           unsigned char *dst_prev = dst;
3933
3934           /* We have to produce designation sequences if any now.  */
3935           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3936           bol_designation = 0;
3937           /* We are sure that designation sequences are all ASCII bytes.  */
3938           produced_chars += dst - dst_prev;
3939         }
3940
3941       c = *charbuf++;
3942
3943       if (c < 0)
3944         {
3945           /* Handle an annotation.  */
3946           switch (*charbuf)
3947             {
3948             case CODING_ANNOTATE_COMPOSITION_MASK:
3949               /* Not yet implemented.  */
3950               break;
3951             case CODING_ANNOTATE_CHARSET_MASK:
3952               preferred_charset_id = charbuf[2];
3953               if (preferred_charset_id >= 0
3954                   && NILP (Fmemq (make_number (preferred_charset_id),
3955                                   charset_list)))
3956                 preferred_charset_id = -1;
3957               break;
3958             default:
3959               abort ();
3960             }
3961           charbuf += -c - 1;
3962           continue;
3963         }
3964
3965       /* Now encode the character C.  */
3966       if (c < 0x20 || c == 0x7F)
3967         {
3968           if (c == '\n'
3969               || (c == '\r' && EQ (eol_type, Qmac)))
3970             {
3971               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3972                 ENCODE_RESET_PLANE_AND_REGISTER ();
3973               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3974                 {
3975                   int i;
3976
3977                   for (i = 0; i < 4; i++)
3978                     CODING_ISO_DESIGNATION (coding, i)
3979                       = CODING_ISO_INITIAL (coding, i);
3980                 }
3981               bol_designation
3982                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3983             }
3984           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3985             ENCODE_RESET_PLANE_AND_REGISTER ();
3986           EMIT_ONE_ASCII_BYTE (c);
3987         }
3988       else if (ASCII_CHAR_P (c))
3989         {
3990           if (ascii_compatible)
3991             EMIT_ONE_ASCII_BYTE (c);
3992           else
3993             {
3994               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3995               ENCODE_ISO_CHARACTER (charset, c);
3996             }
3997         }
3998       else if (CHAR_BYTE8_P (c))
3999         {
4000           c = CHAR_TO_BYTE8 (c);
4001           EMIT_ONE_BYTE (c);
4002         }
4003       else
4004         {
4005           struct charset *charset;
4006
4007           if (preferred_charset_id >= 0)
4008             {
4009               charset = CHARSET_FROM_ID (preferred_charset_id);
4010               if (! CHAR_CHARSET_P (c, charset))
4011                 charset = char_charset (c, charset_list, NULL);
4012             }
4013           else
4014             charset = char_charset (c, charset_list, NULL);
4015           if (!charset)
4016             {
4017               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4018                 {
4019                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4020                   charset = CHARSET_FROM_ID (charset_ascii);
4021                 }
4022               else
4023                 {
4024                   c = coding->default_char;
4025                   charset = char_charset (c, charset_list, NULL);
4026                 }
4027             }
4028           ENCODE_ISO_CHARACTER (charset, c);
4029         }
4030     }
4031
4032   if (coding->mode & CODING_MODE_LAST_BLOCK
4033       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4034     {
4035       ASSURE_DESTINATION (safe_room);
4036       ENCODE_RESET_PLANE_AND_REGISTER ();
4037     }
4038   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4039   CODING_ISO_BOL (coding) = bol_designation;
4040   coding->produced_char += produced_chars;
4041   coding->produced = dst - coding->destination;
4042   return 0;
4043 }
4044
4045 \f
4046 /*** 8,9. SJIS and BIG5 handlers ***/
4047
4048 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4049    quite widely.  So, for the moment, Emacs supports them in the bare
4050    C code.  But, in the future, they may be supported only by CCL.  */
4051
4052 /* SJIS is a coding system encoding three character sets: ASCII, right
4053    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4054    as is.  A character of charset katakana-jisx0201 is encoded by
4055    "position-code + 0x80".  A character of charset japanese-jisx0208
4056    is encoded in 2-byte but two position-codes are divided and shifted
4057    so that it fit in the range below.
4058
4059    --- CODE RANGE of SJIS ---
4060    (character set)      (range)
4061    ASCII                0x00 .. 0x7F
4062    KATAKANA-JISX0201    0xA0 .. 0xDF
4063    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4064             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4065    -------------------------------
4066
4067 */
4068
4069 /* BIG5 is a coding system encoding two character sets: ASCII and
4070    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4071    character set and is encoded in two-byte.
4072
4073    --- CODE RANGE of BIG5 ---
4074    (character set)      (range)
4075    ASCII                0x00 .. 0x7F
4076    Big5 (1st byte)      0xA1 .. 0xFE
4077         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4078    --------------------------
4079
4080   */
4081
4082 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4083    Check if a text is encoded in SJIS.  If it is, return
4084    CATEGORY_MASK_SJIS, else return 0.  */
4085
4086 static int
4087 detect_coding_sjis (coding, detect_info)
4088      struct coding_system *coding;
4089      struct coding_detection_info *detect_info;
4090 {
4091   const unsigned char *src = coding->source, *src_base;
4092   const unsigned char *src_end = coding->source + coding->src_bytes;
4093   int multibytep = coding->src_multibyte;
4094   int consumed_chars = 0;
4095   int found = 0;
4096   int c;
4097
4098   detect_info->checked |= CATEGORY_MASK_SJIS;
4099   /* A coding system of this category is always ASCII compatible.  */
4100   src += coding->head_ascii;
4101
4102   while (1)
4103     {
4104       src_base = src;
4105       ONE_MORE_BYTE (c);
4106       if (c < 0x80)
4107         continue;
4108       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4109         {
4110           ONE_MORE_BYTE (c);
4111           if (c < 0x40 || c == 0x7F || c > 0xFC)
4112             break;
4113           found = CATEGORY_MASK_SJIS;
4114         }
4115       else if (c >= 0xA0 && c < 0xE0)
4116         found = CATEGORY_MASK_SJIS;
4117       else
4118         break;
4119     }
4120   detect_info->rejected |= CATEGORY_MASK_SJIS;
4121   return 0;
4122
4123  no_more_source:
4124   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4125     {
4126       detect_info->rejected |= CATEGORY_MASK_SJIS;
4127       return 0;
4128     }
4129   detect_info->found |= found;
4130   return 1;
4131 }
4132
4133 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4134    Check if a text is encoded in BIG5.  If it is, return
4135    CATEGORY_MASK_BIG5, else return 0.  */
4136
4137 static int
4138 detect_coding_big5 (coding, detect_info)
4139      struct coding_system *coding;
4140      struct coding_detection_info *detect_info;
4141 {
4142   const unsigned char *src = coding->source, *src_base;
4143   const unsigned char *src_end = coding->source + coding->src_bytes;
4144   int multibytep = coding->src_multibyte;
4145   int consumed_chars = 0;
4146   int found = 0;
4147   int c;
4148
4149   detect_info->checked |= CATEGORY_MASK_BIG5;
4150   /* A coding system of this category is always ASCII compatible.  */
4151   src += coding->head_ascii;
4152
4153   while (1)
4154     {
4155       src_base = src;
4156       ONE_MORE_BYTE (c);
4157       if (c < 0x80)
4158         continue;
4159       if (c >= 0xA1)
4160         {
4161           ONE_MORE_BYTE (c);
4162           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4163             return 0;
4164           found = CATEGORY_MASK_BIG5;
4165         }
4166       else
4167         break;
4168     }
4169   detect_info->rejected |= CATEGORY_MASK_BIG5;
4170   return 0;
4171
4172  no_more_source:
4173   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4174     {
4175       detect_info->rejected |= CATEGORY_MASK_BIG5;
4176       return 0;
4177     }
4178   detect_info->found |= found;
4179   return 1;
4180 }
4181
4182 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4183    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4184
4185 static void
4186 decode_coding_sjis (coding)
4187      struct coding_system *coding;
4188 {
4189   const unsigned char *src = coding->source + coding->consumed;
4190   const unsigned char *src_end = coding->source + coding->src_bytes;
4191   const unsigned char *src_base;
4192   int *charbuf = coding->charbuf + coding->charbuf_used;
4193   int *charbuf_end
4194     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4195   int consumed_chars = 0, consumed_chars_base;
4196   int multibytep = coding->src_multibyte;
4197   struct charset *charset_roman, *charset_kanji, *charset_kana;
4198   struct charset *charset_kanji2;
4199   Lisp_Object attrs, charset_list, val;
4200   int char_offset = coding->produced_char;
4201   int last_offset = char_offset;
4202   int last_id = charset_ascii;
4203   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4204   int byte_after_cr = -1;
4205
4206   CODING_GET_INFO (coding, attrs, charset_list);
4207
4208   val = charset_list;
4209   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4210   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4211   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4212   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4213
4214   while (1)
4215     {
4216       int c, c1;
4217       struct charset *charset;
4218
4219       src_base = src;
4220       consumed_chars_base = consumed_chars;
4221
4222       if (charbuf >= charbuf_end)
4223         break;
4224
4225       if (byte_after_cr >= 0)
4226         c = byte_after_cr, byte_after_cr = -1;
4227       else
4228         ONE_MORE_BYTE (c);
4229       if (c < 0)
4230         goto invalid_code;
4231       if (c < 0x80)
4232         {
4233           if (eol_crlf && c == '\r')
4234             ONE_MORE_BYTE (byte_after_cr);
4235           charset = charset_roman;
4236         }
4237       else if (c == 0x80 || c == 0xA0)
4238         goto invalid_code;
4239       else if (c >= 0xA1 && c <= 0xDF)
4240         {
4241           /* SJIS -> JISX0201-Kana */
4242           c &= 0x7F;
4243           charset = charset_kana;
4244         }
4245       else if (c <= 0xEF)
4246         {
4247           /* SJIS -> JISX0208 */
4248           ONE_MORE_BYTE (c1);
4249           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4250             goto invalid_code;
4251           c = (c << 8) | c1;
4252           SJIS_TO_JIS (c);
4253           charset = charset_kanji;
4254         }
4255       else if (c <= 0xFC && charset_kanji2)
4256         {
4257           /* SJIS -> JISX0213-2 */
4258           ONE_MORE_BYTE (c1);
4259           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4260             goto invalid_code;
4261           c = (c << 8) | c1;
4262           SJIS_TO_JIS2 (c);
4263           charset = charset_kanji2;
4264         }
4265       else
4266         goto invalid_code;
4267       if (charset->id != charset_ascii
4268           && last_id != charset->id)
4269         {
4270           if (last_id != charset_ascii)
4271             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4272           last_id = charset->id;
4273           last_offset = char_offset;
4274         }
4275       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4276       *charbuf++ = c;
4277       char_offset++;
4278       continue;
4279
4280     invalid_code:
4281       src = src_base;
4282       consumed_chars = consumed_chars_base;
4283       ONE_MORE_BYTE (c);
4284       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4285       char_offset++;
4286       coding->errors++;
4287     }
4288
4289  no_more_source:
4290   if (last_id != charset_ascii)
4291     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4292   coding->consumed_char += consumed_chars_base;
4293   coding->consumed = src_base - coding->source;
4294   coding->charbuf_used = charbuf - coding->charbuf;
4295 }
4296
4297 static void
4298 decode_coding_big5 (coding)
4299      struct coding_system *coding;
4300 {
4301   const unsigned char *src = coding->source + coding->consumed;
4302   const unsigned char *src_end = coding->source + coding->src_bytes;
4303   const unsigned char *src_base;
4304   int *charbuf = coding->charbuf + coding->charbuf_used;
4305   int *charbuf_end
4306     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4307   int consumed_chars = 0, consumed_chars_base;
4308   int multibytep = coding->src_multibyte;
4309   struct charset *charset_roman, *charset_big5;
4310   Lisp_Object attrs, charset_list, val;
4311   int char_offset = coding->produced_char;
4312   int last_offset = char_offset;
4313   int last_id = charset_ascii;
4314   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4315   int byte_after_cr = -1;
4316
4317   CODING_GET_INFO (coding, attrs, charset_list);
4318   val = charset_list;
4319   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4320   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4321
4322   while (1)
4323     {
4324       int c, c1;
4325       struct charset *charset;
4326
4327       src_base = src;
4328       consumed_chars_base = consumed_chars;
4329
4330       if (charbuf >= charbuf_end)
4331         break;
4332
4333       if (byte_after_cr >= 0)
4334         c = byte_after_cr, byte_after_cr = -1;
4335       else
4336         ONE_MORE_BYTE (c);
4337
4338       if (c < 0)
4339         goto invalid_code;
4340       if (c < 0x80)
4341         {
4342           if (eol_crlf && c == '\r')
4343             ONE_MORE_BYTE (byte_after_cr);
4344           charset = charset_roman;
4345         }
4346       else
4347         {
4348           /* BIG5 -> Big5 */
4349           if (c < 0xA1 || c > 0xFE)
4350             goto invalid_code;
4351           ONE_MORE_BYTE (c1);
4352           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4353             goto invalid_code;
4354           c = c << 8 | c1;
4355           charset = charset_big5;
4356         }
4357       if (charset->id != charset_ascii
4358           && last_id != charset->id)
4359         {
4360           if (last_id != charset_ascii)
4361             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4362           last_id = charset->id;
4363           last_offset = char_offset;
4364         }
4365       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4366       *charbuf++ = c;
4367       char_offset++;
4368       continue;
4369
4370     invalid_code:
4371       src = src_base;
4372       consumed_chars = consumed_chars_base;
4373       ONE_MORE_BYTE (c);
4374       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4375       char_offset++;
4376       coding->errors++;
4377     }
4378
4379  no_more_source:
4380   if (last_id != charset_ascii)
4381     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4382   coding->consumed_char += consumed_chars_base;
4383   coding->consumed = src_base - coding->source;
4384   coding->charbuf_used = charbuf - coding->charbuf;
4385 }
4386
4387 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4388    This function can encode charsets `ascii', `katakana-jisx0201',
4389    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4390    are sure that all these charsets are registered as official charset
4391    (i.e. do not have extended leading-codes).  Characters of other
4392    charsets are produced without any encoding.  If SJIS_P is 1, encode
4393    SJIS text, else encode BIG5 text.  */
4394
4395 static int
4396 encode_coding_sjis (coding)
4397      struct coding_system *coding;
4398 {
4399   int multibytep = coding->dst_multibyte;
4400   int *charbuf = coding->charbuf;
4401   int *charbuf_end = charbuf + coding->charbuf_used;
4402   unsigned char *dst = coding->destination + coding->produced;
4403   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4404   int safe_room = 4;
4405   int produced_chars = 0;
4406   Lisp_Object attrs, charset_list, val;
4407   int ascii_compatible;
4408   struct charset *charset_roman, *charset_kanji, *charset_kana;
4409   struct charset *charset_kanji2;
4410   int c;
4411
4412   CODING_GET_INFO (coding, attrs, charset_list);
4413   val = charset_list;
4414   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4415   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4416   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4417   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4418
4419   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4420
4421   while (charbuf < charbuf_end)
4422     {
4423       ASSURE_DESTINATION (safe_room);
4424       c = *charbuf++;
4425       /* Now encode the character C.  */
4426       if (ASCII_CHAR_P (c) && ascii_compatible)
4427         EMIT_ONE_ASCII_BYTE (c);
4428       else if (CHAR_BYTE8_P (c))
4429         {
4430           c = CHAR_TO_BYTE8 (c);
4431           EMIT_ONE_BYTE (c);
4432         }
4433       else
4434         {
4435           unsigned code;
4436           struct charset *charset = char_charset (c, charset_list, &code);
4437
4438           if (!charset)
4439             {
4440               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4441                 {
4442                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4443                   charset = CHARSET_FROM_ID (charset_ascii);
4444                 }
4445               else
4446                 {
4447                   c = coding->default_char;
4448                   charset = char_charset (c, charset_list, &code);
4449                 }
4450             }
4451           if (code == CHARSET_INVALID_CODE (charset))
4452             abort ();
4453           if (charset == charset_kanji)
4454             {
4455               int c1, c2;
4456               JIS_TO_SJIS (code);
4457               c1 = code >> 8, c2 = code & 0xFF;
4458               EMIT_TWO_BYTES (c1, c2);
4459             }
4460           else if (charset == charset_kana)
4461             EMIT_ONE_BYTE (code | 0x80);
4462           else if (charset_kanji2 && charset == charset_kanji2)
4463             {
4464               int c1, c2;
4465
4466               c1 = code >> 8;
4467               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4468                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4469                 {
4470                   JIS_TO_SJIS2 (code);
4471                   c1 = code >> 8, c2 = code & 0xFF;
4472                   EMIT_TWO_BYTES (c1, c2);
4473                 }
4474               else
4475                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4476             }
4477           else
4478             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4479         }
4480     }
4481   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4482   coding->produced_char += produced_chars;
4483   coding->produced = dst - coding->destination;
4484   return 0;
4485 }
4486
4487 static int
4488 encode_coding_big5 (coding)
4489      struct coding_system *coding;
4490 {
4491   int multibytep = coding->dst_multibyte;
4492   int *charbuf = coding->charbuf;
4493   int *charbuf_end = charbuf + coding->charbuf_used;
4494   unsigned char *dst = coding->destination + coding->produced;
4495   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4496   int safe_room = 4;
4497   int produced_chars = 0;
4498   Lisp_Object attrs, charset_list, val;
4499   int ascii_compatible;
4500   struct charset *charset_roman, *charset_big5;
4501   int c;
4502
4503   CODING_GET_INFO (coding, attrs, charset_list);
4504   val = charset_list;
4505   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4506   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4507   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4508
4509   while (charbuf < charbuf_end)
4510     {
4511       ASSURE_DESTINATION (safe_room);
4512       c = *charbuf++;
4513       /* Now encode the character C.  */
4514       if (ASCII_CHAR_P (c) && ascii_compatible)
4515         EMIT_ONE_ASCII_BYTE (c);
4516       else if (CHAR_BYTE8_P (c))
4517         {
4518           c = CHAR_TO_BYTE8 (c);
4519           EMIT_ONE_BYTE (c);
4520         }
4521       else
4522         {
4523           unsigned code;
4524           struct charset *charset = char_charset (c, charset_list, &code);
4525
4526           if (! charset)
4527             {
4528               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4529                 {
4530                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4531                   charset = CHARSET_FROM_ID (charset_ascii);
4532                 }
4533               else
4534                 {
4535                   c = coding->default_char;
4536                   charset = char_charset (c, charset_list, &code);
4537                 }
4538             }
4539           if (code == CHARSET_INVALID_CODE (charset))
4540             abort ();
4541           if (charset == charset_big5)
4542             {
4543               int c1, c2;
4544
4545               c1 = code >> 8, c2 = code & 0xFF;
4546               EMIT_TWO_BYTES (c1, c2);
4547             }
4548           else
4549             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4550         }
4551     }
4552   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4553   coding->produced_char += produced_chars;
4554   coding->produced = dst - coding->destination;
4555   return 0;
4556 }
4557
4558 \f
4559 /*** 10. CCL handlers ***/
4560
4561 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4562    Check if a text is encoded in a coding system of which
4563    encoder/decoder are written in CCL program.  If it is, return
4564    CATEGORY_MASK_CCL, else return 0.  */
4565
4566 static int
4567 detect_coding_ccl (coding, detect_info)
4568      struct coding_system *coding;
4569      struct coding_detection_info *detect_info;
4570 {
4571   const unsigned char *src = coding->source, *src_base;
4572   const unsigned char *src_end = coding->source + coding->src_bytes;
4573   int multibytep = coding->src_multibyte;
4574   int consumed_chars = 0;
4575   int found = 0;
4576   unsigned char *valids;
4577   int head_ascii = coding->head_ascii;
4578   Lisp_Object attrs;
4579
4580   detect_info->checked |= CATEGORY_MASK_CCL;
4581
4582   coding = &coding_categories[coding_category_ccl];
4583   valids = CODING_CCL_VALIDS (coding);
4584   attrs = CODING_ID_ATTRS (coding->id);
4585   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4586     src += head_ascii;
4587
4588   while (1)
4589     {
4590       int c;
4591
4592       src_base = src;
4593       ONE_MORE_BYTE (c);
4594       if (c < 0 || ! valids[c])
4595         break;
4596       if ((valids[c] > 1))
4597         found = CATEGORY_MASK_CCL;
4598     }
4599   detect_info->rejected |= CATEGORY_MASK_CCL;
4600   return 0;
4601
4602  no_more_source:
4603   detect_info->found |= found;
4604   return 1;
4605 }
4606
4607 static void
4608 decode_coding_ccl (coding)
4609      struct coding_system *coding;
4610 {
4611   const unsigned char *src = coding->source + coding->consumed;
4612   const unsigned char *src_end = coding->source + coding->src_bytes;
4613   int *charbuf = coding->charbuf + coding->charbuf_used;
4614   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4615   int consumed_chars = 0;
4616   int multibytep = coding->src_multibyte;
4617   struct ccl_program ccl;
4618   int source_charbuf[1024];
4619   int source_byteidx[1024];
4620   Lisp_Object attrs, charset_list;
4621
4622   CODING_GET_INFO (coding, attrs, charset_list);
4623   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4624
4625   while (src < src_end)
4626     {
4627       const unsigned char *p = src;
4628       int *source, *source_end;
4629       int i = 0;
4630
4631       if (multibytep)
4632         while (i < 1024 && p < src_end)
4633           {
4634             source_byteidx[i] = p - src;
4635             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4636           }
4637       else
4638         while (i < 1024 && p < src_end)
4639           source_charbuf[i++] = *p++;
4640
4641       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4642         ccl.last_block = 1;
4643
4644       source = source_charbuf;
4645       source_end = source + i;
4646       while (source < source_end)
4647         {
4648           ccl_driver (&ccl, source, charbuf,
4649                       source_end - source, charbuf_end - charbuf,
4650                       charset_list);
4651           source += ccl.consumed;
4652           charbuf += ccl.produced;
4653           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4654             break;
4655         }
4656       if (source < source_end)
4657         src += source_byteidx[source - source_charbuf];
4658       else
4659         src = p;
4660       consumed_chars += source - source_charbuf;
4661
4662       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4663           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4664         break;
4665     }
4666
4667   switch (ccl.status)
4668     {
4669     case CCL_STAT_SUSPEND_BY_SRC:
4670       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4671       break;
4672     case CCL_STAT_SUSPEND_BY_DST:
4673       break;
4674     case CCL_STAT_QUIT:
4675     case CCL_STAT_INVALID_CMD:
4676       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4677       break;
4678     default:
4679       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4680       break;
4681     }
4682   coding->consumed_char += consumed_chars;
4683   coding->consumed = src - coding->source;
4684   coding->charbuf_used = charbuf - coding->charbuf;
4685 }
4686
4687 static int
4688 encode_coding_ccl (coding)
4689      struct coding_system *coding;
4690 {
4691   struct ccl_program ccl;
4692   int multibytep = coding->dst_multibyte;
4693   int *charbuf = coding->charbuf;
4694   int *charbuf_end = charbuf + coding->charbuf_used;
4695   unsigned char *dst = coding->destination + coding->produced;
4696   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4697   int destination_charbuf[1024];
4698   int i, produced_chars = 0;
4699   Lisp_Object attrs, charset_list;
4700
4701   CODING_GET_INFO (coding, attrs, charset_list);
4702   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4703
4704   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4705   ccl.dst_multibyte = coding->dst_multibyte;
4706
4707   while (charbuf < charbuf_end)
4708     {
4709       ccl_driver (&ccl, charbuf, destination_charbuf,
4710                   charbuf_end - charbuf, 1024, charset_list);
4711       if (multibytep)
4712         {
4713           ASSURE_DESTINATION (ccl.produced * 2);
4714           for (i = 0; i < ccl.produced; i++)
4715             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4716         }
4717       else
4718         {
4719           ASSURE_DESTINATION (ccl.produced);
4720           for (i = 0; i < ccl.produced; i++)
4721             *dst++ = destination_charbuf[i] & 0xFF;
4722           produced_chars += ccl.produced;
4723         }
4724       charbuf += ccl.consumed;
4725       if (ccl.status == CCL_STAT_QUIT
4726           || ccl.status == CCL_STAT_INVALID_CMD)
4727         break;
4728     }
4729
4730   switch (ccl.status)
4731     {
4732     case CCL_STAT_SUSPEND_BY_SRC:
4733       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4734       break;
4735     case CCL_STAT_SUSPEND_BY_DST:
4736       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4737       break;
4738     case CCL_STAT_QUIT:
4739     case CCL_STAT_INVALID_CMD:
4740       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4741       break;
4742     default:
4743       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4744       break;
4745     }
4746
4747   coding->produced_char += produced_chars;
4748   coding->produced = dst - coding->destination;
4749   return 0;
4750 }
4751
4752
4753 \f
4754 /*** 10, 11. no-conversion handlers ***/
4755
4756 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4757
4758 static void
4759 decode_coding_raw_text (coding)
4760      struct coding_system *coding;
4761 {
4762   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4763
4764   coding->chars_at_source = 1;
4765   coding->consumed_char = coding->src_chars;
4766   coding->consumed = coding->src_bytes;
4767   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4768     {
4769       coding->consumed_char--;
4770       coding->consumed--;
4771       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4772     }
4773   else
4774     record_conversion_result (coding, CODING_RESULT_SUCCESS);
4775 }
4776
4777 static int
4778 encode_coding_raw_text (coding)
4779      struct coding_system *coding;
4780 {
4781   int multibytep = coding->dst_multibyte;
4782   int *charbuf = coding->charbuf;
4783   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4784   unsigned char *dst = coding->destination + coding->produced;
4785   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4786   int produced_chars = 0;
4787   int c;
4788
4789   if (multibytep)
4790     {
4791       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4792
4793       if (coding->src_multibyte)
4794         while (charbuf < charbuf_end)
4795           {
4796             ASSURE_DESTINATION (safe_room);
4797             c = *charbuf++;
4798             if (ASCII_CHAR_P (c))
4799               EMIT_ONE_ASCII_BYTE (c);
4800             else if (CHAR_BYTE8_P (c))
4801               {
4802                 c = CHAR_TO_BYTE8 (c);
4803                 EMIT_ONE_BYTE (c);
4804               }
4805             else
4806               {
4807                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4808
4809                 CHAR_STRING_ADVANCE (c, p1);
4810                 while (p0 < p1)
4811                   {
4812                     EMIT_ONE_BYTE (*p0);
4813                     p0++;
4814                   }
4815               }
4816           }
4817       else
4818         while (charbuf < charbuf_end)
4819           {
4820             ASSURE_DESTINATION (safe_room);
4821             c = *charbuf++;
4822             EMIT_ONE_BYTE (c);
4823           }
4824     }
4825   else
4826     {
4827       if (coding->src_multibyte)
4828         {
4829           int safe_room = MAX_MULTIBYTE_LENGTH;
4830
4831           while (charbuf < charbuf_end)
4832             {
4833               ASSURE_DESTINATION (safe_room);
4834               c = *charbuf++;
4835               if (ASCII_CHAR_P (c))
4836                 *dst++ = c;
4837               else if (CHAR_BYTE8_P (c))
4838                 *dst++ = CHAR_TO_BYTE8 (c);
4839               else
4840                 CHAR_STRING_ADVANCE (c, dst);
4841               produced_chars++;
4842             }
4843         }
4844       else
4845         {
4846           ASSURE_DESTINATION (charbuf_end - charbuf);
4847           while (charbuf < charbuf_end && dst < dst_end)
4848             *dst++ = *charbuf++;
4849           produced_chars = dst - (coding->destination + coding->dst_bytes);
4850         }
4851     }
4852   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4853   coding->produced_char += produced_chars;
4854   coding->produced = dst - coding->destination;
4855   return 0;
4856 }
4857
4858 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4859    Check if a text is encoded in a charset-based coding system.  If it
4860    is, return 1, else return 0.  */
4861
4862 static int
4863 detect_coding_charset (coding, detect_info)
4864      struct coding_system *coding;
4865      struct coding_detection_info *detect_info;
4866 {
4867   const unsigned char *src = coding->source, *src_base;
4868   const unsigned char *src_end = coding->source + coding->src_bytes;
4869   int multibytep = coding->src_multibyte;
4870   int consumed_chars = 0;
4871   Lisp_Object attrs, valids;
4872   int found = 0;
4873   int head_ascii = coding->head_ascii;
4874
4875   detect_info->checked |= CATEGORY_MASK_CHARSET;
4876
4877   coding = &coding_categories[coding_category_charset];
4878   attrs = CODING_ID_ATTRS (coding->id);
4879   valids = AREF (attrs, coding_attr_charset_valids);
4880
4881   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4882     src += head_ascii;
4883
4884   while (1)
4885     {
4886       int c;
4887       Lisp_Object val;
4888       struct charset *charset;
4889       int dim, idx;
4890
4891       src_base = src;
4892       ONE_MORE_BYTE (c);
4893       if (c < 0)
4894         continue;
4895       val = AREF (valids, c);
4896       if (NILP (val))
4897         break;
4898       if (c >= 0x80)
4899         found = CATEGORY_MASK_CHARSET;
4900       if (INTEGERP (val))
4901         {
4902           charset = CHARSET_FROM_ID (XFASTINT (val));
4903           dim = CHARSET_DIMENSION (charset);
4904           for (idx = 1; idx < dim; idx++)
4905             {
4906               if (src == src_end)
4907                 goto too_short;
4908               ONE_MORE_BYTE (c);
4909               if (c < charset->code_space[(dim - 1 - idx) * 2]
4910                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
4911                 break;
4912             }
4913           if (idx < dim)
4914             break;
4915         }
4916       else
4917         {
4918           idx = 1;
4919           for (; CONSP (val); val = XCDR (val))
4920             {
4921               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4922               dim = CHARSET_DIMENSION (charset);
4923               while (idx < dim)
4924                 {
4925                   if (src == src_end)
4926                     goto too_short;
4927                   ONE_MORE_BYTE (c);
4928                   if (c < charset->code_space[(dim - 1 - idx) * 4]
4929                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
4930                     break;
4931                   idx++;
4932                 }
4933               if (idx == dim)
4934                 {
4935                   val = Qnil;
4936                   break;
4937                 }
4938             }
4939           if (CONSP (val))
4940             break;
4941         }
4942     }
4943  too_short:
4944   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4945   return 0;
4946
4947  no_more_source:
4948   detect_info->found |= found;
4949   return 1;
4950 }
4951
4952 static void
4953 decode_coding_charset (coding)
4954      struct coding_system *coding;
4955 {
4956   const unsigned char *src = coding->source + coding->consumed;
4957   const unsigned char *src_end = coding->source + coding->src_bytes;
4958   const unsigned char *src_base;
4959   int *charbuf = coding->charbuf + coding->charbuf_used;
4960   int *charbuf_end
4961     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4962   int consumed_chars = 0, consumed_chars_base;
4963   int multibytep = coding->src_multibyte;
4964   Lisp_Object attrs, charset_list, valids;
4965   int char_offset = coding->produced_char;
4966   int last_offset = char_offset;
4967   int last_id = charset_ascii;
4968   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4969   int byte_after_cr = -1;
4970
4971   CODING_GET_INFO (coding, attrs, charset_list);
4972   valids = AREF (attrs, coding_attr_charset_valids);
4973
4974   while (1)
4975     {
4976       int c;
4977       Lisp_Object val;
4978       struct charset *charset;
4979       int dim;
4980       int len = 1;
4981       unsigned code;
4982
4983       src_base = src;
4984       consumed_chars_base = consumed_chars;
4985
4986       if (charbuf >= charbuf_end)
4987         break;
4988
4989       if (byte_after_cr >= 0)
4990         {
4991           c = byte_after_cr;
4992           byte_after_cr = -1;
4993         }
4994       else
4995         {
4996           ONE_MORE_BYTE (c);
4997           if (eol_crlf && c == '\r')
4998             ONE_MORE_BYTE (byte_after_cr);
4999         }
5000       if (c < 0)
5001         goto invalid_code;
5002       code = c;
5003
5004       val = AREF (valids, c);
5005       if (NILP (val))
5006         goto invalid_code;
5007       if (INTEGERP (val))
5008         {
5009           charset = CHARSET_FROM_ID (XFASTINT (val));
5010           dim = CHARSET_DIMENSION (charset);
5011           while (len < dim)
5012             {
5013               ONE_MORE_BYTE (c);
5014               code = (code << 8) | c;
5015               len++;
5016             }
5017           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5018                               charset, code, c);
5019         }
5020       else
5021         {
5022           /* VAL is a list of charset IDs.  It is assured that the
5023              list is sorted by charset dimensions (smaller one
5024              comes first).  */
5025           while (CONSP (val))
5026             {
5027               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5028               dim = CHARSET_DIMENSION (charset);
5029               while (len < dim)
5030                 {
5031                   ONE_MORE_BYTE (c);
5032                   code = (code << 8) | c;
5033                   len++;
5034                 }
5035               CODING_DECODE_CHAR (coding, src, src_base,
5036                                   src_end, charset, code, c);
5037               if (c >= 0)
5038                 break;
5039               val = XCDR (val);
5040             }
5041         }
5042       if (c < 0)
5043         goto invalid_code;
5044       if (charset->id != charset_ascii
5045           && last_id != charset->id)
5046         {
5047           if (last_id != charset_ascii)
5048             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5049           last_id = charset->id;
5050           last_offset = char_offset;
5051         }
5052
5053       *charbuf++ = c;
5054       char_offset++;
5055       continue;
5056
5057     invalid_code:
5058       src = src_base;
5059       consumed_chars = consumed_chars_base;
5060       ONE_MORE_BYTE (c);
5061       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5062       char_offset++;
5063       coding->errors++;
5064     }
5065
5066  no_more_source:
5067   if (last_id != charset_ascii)
5068     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5069   coding->consumed_char += consumed_chars_base;
5070   coding->consumed = src_base - coding->source;
5071   coding->charbuf_used = charbuf - coding->charbuf;
5072 }
5073
5074 static int
5075 encode_coding_charset (coding)
5076      struct coding_system *coding;
5077 {
5078   int multibytep = coding->dst_multibyte;
5079   int *charbuf = coding->charbuf;
5080   int *charbuf_end = charbuf + coding->charbuf_used;
5081   unsigned char *dst = coding->destination + coding->produced;
5082   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5083   int safe_room = MAX_MULTIBYTE_LENGTH;
5084   int produced_chars = 0;
5085   Lisp_Object attrs, charset_list;
5086   int ascii_compatible;
5087   int c;
5088
5089   CODING_GET_INFO (coding, attrs, charset_list);
5090   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5091
5092   while (charbuf < charbuf_end)
5093     {
5094       struct charset *charset;
5095       unsigned code;
5096
5097       ASSURE_DESTINATION (safe_room);
5098       c = *charbuf++;
5099       if (ascii_compatible && ASCII_CHAR_P (c))
5100         EMIT_ONE_ASCII_BYTE (c);
5101       else if (CHAR_BYTE8_P (c))
5102         {
5103           c = CHAR_TO_BYTE8 (c);
5104           EMIT_ONE_BYTE (c);
5105         }
5106       else
5107         {
5108           charset = char_charset (c, charset_list, &code);
5109           if (charset)
5110             {
5111               if (CHARSET_DIMENSION (charset) == 1)
5112                 EMIT_ONE_BYTE (code);
5113               else if (CHARSET_DIMENSION (charset) == 2)
5114                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5115               else if (CHARSET_DIMENSION (charset) == 3)
5116                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5117               else
5118                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5119                                  (code >> 8) & 0xFF, code & 0xFF);
5120             }
5121           else
5122             {
5123               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5124                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5125               else
5126                 c = coding->default_char;
5127               EMIT_ONE_BYTE (c);
5128             }
5129         }
5130     }
5131
5132   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5133   coding->produced_char += produced_chars;
5134   coding->produced = dst - coding->destination;
5135   return 0;
5136 }
5137
5138 \f
5139 /*** 7. C library functions ***/
5140
5141 /* Setup coding context CODING from information about CODING_SYSTEM.
5142    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5143    CODING_SYSTEM is invalid, signal an error.  */
5144
5145 void
5146 setup_coding_system (coding_system, coding)
5147      Lisp_Object coding_system;
5148      struct coding_system *coding;
5149 {
5150   Lisp_Object attrs;
5151   Lisp_Object eol_type;
5152   Lisp_Object coding_type;
5153   Lisp_Object val;
5154
5155   if (NILP (coding_system))
5156     coding_system = Qundecided;
5157
5158   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5159
5160   attrs = CODING_ID_ATTRS (coding->id);
5161   eol_type = CODING_ID_EOL_TYPE (coding->id);
5162
5163   coding->mode = 0;
5164   coding->head_ascii = -1;
5165   if (VECTORP (eol_type))
5166     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5167                             | CODING_REQUIRE_DETECTION_MASK);
5168   else if (! EQ (eol_type, Qunix))
5169     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5170                             | CODING_REQUIRE_ENCODING_MASK);
5171   else
5172     coding->common_flags = 0;
5173   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5174     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5175   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5176     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5177   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5178     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5179
5180   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5181   coding->max_charset_id = SCHARS (val) - 1;
5182   coding->safe_charsets = (char *) SDATA (val);
5183   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5184
5185   coding_type = CODING_ATTR_TYPE (attrs);
5186   if (EQ (coding_type, Qundecided))
5187     {
5188       coding->detector = NULL;
5189       coding->decoder = decode_coding_raw_text;
5190       coding->encoder = encode_coding_raw_text;
5191       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5192     }
5193   else if (EQ (coding_type, Qiso_2022))
5194     {
5195       int i;
5196       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5197
5198       /* Invoke graphic register 0 to plane 0.  */
5199       CODING_ISO_INVOCATION (coding, 0) = 0;
5200       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5201       CODING_ISO_INVOCATION (coding, 1)
5202         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5203       /* Setup the initial status of designation.  */
5204       for (i = 0; i < 4; i++)
5205         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5206       /* Not single shifting initially.  */
5207       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5208       /* Beginning of buffer should also be regarded as bol. */
5209       CODING_ISO_BOL (coding) = 1;
5210       coding->detector = detect_coding_iso_2022;
5211       coding->decoder = decode_coding_iso_2022;
5212       coding->encoder = encode_coding_iso_2022;
5213       if (flags & CODING_ISO_FLAG_SAFE)
5214         coding->mode |= CODING_MODE_SAFE_ENCODING;
5215       coding->common_flags
5216         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5217             | CODING_REQUIRE_FLUSHING_MASK);
5218       if (flags & CODING_ISO_FLAG_COMPOSITION)
5219         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5220       if (flags & CODING_ISO_FLAG_DESIGNATION)
5221         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5222       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5223         {
5224           setup_iso_safe_charsets (attrs);
5225           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5226           coding->max_charset_id = SCHARS (val) - 1;
5227           coding->safe_charsets = (char *) SDATA (val);
5228         }
5229       CODING_ISO_FLAGS (coding) = flags;
5230     }
5231   else if (EQ (coding_type, Qcharset))
5232     {
5233       coding->detector = detect_coding_charset;
5234       coding->decoder = decode_coding_charset;
5235       coding->encoder = encode_coding_charset;
5236       coding->common_flags
5237         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5238     }
5239   else if (EQ (coding_type, Qutf_8))
5240     {
5241       coding->detector = detect_coding_utf_8;
5242       coding->decoder = decode_coding_utf_8;
5243       coding->encoder = encode_coding_utf_8;
5244       coding->common_flags
5245         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5246     }
5247   else if (EQ (coding_type, Qutf_16))
5248     {
5249       val = AREF (attrs, coding_attr_utf_16_bom);
5250       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
5251                                     : EQ (val, Qt) ? utf_16_with_bom
5252                                     : utf_16_without_bom);
5253       val = AREF (attrs, coding_attr_utf_16_endian);
5254       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5255                                        : utf_16_little_endian);
5256       CODING_UTF_16_SURROGATE (coding) = 0;
5257       coding->detector = detect_coding_utf_16;
5258       coding->decoder = decode_coding_utf_16;
5259       coding->encoder = encode_coding_utf_16;
5260       coding->common_flags
5261         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5262       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
5263         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5264     }
5265   else if (EQ (coding_type, Qccl))
5266     {
5267       coding->detector = detect_coding_ccl;
5268       coding->decoder = decode_coding_ccl;
5269       coding->encoder = encode_coding_ccl;
5270       coding->common_flags
5271         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5272             | CODING_REQUIRE_FLUSHING_MASK);
5273     }
5274   else if (EQ (coding_type, Qemacs_mule))
5275     {
5276       coding->detector = detect_coding_emacs_mule;
5277       coding->decoder = decode_coding_emacs_mule;
5278       coding->encoder = encode_coding_emacs_mule;
5279       coding->common_flags
5280         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5281       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5282           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5283         {
5284           Lisp_Object tail, safe_charsets;
5285           int max_charset_id = 0;
5286
5287           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5288                tail = XCDR (tail))
5289             if (max_charset_id < XFASTINT (XCAR (tail)))
5290               max_charset_id = XFASTINT (XCAR (tail));
5291           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5292                                         make_number (255));
5293           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5294                tail = XCDR (tail))
5295             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5296           coding->max_charset_id = max_charset_id;
5297           coding->safe_charsets = (char *) SDATA (safe_charsets);
5298         }
5299     }
5300   else if (EQ (coding_type, Qshift_jis))
5301     {
5302       coding->detector = detect_coding_sjis;
5303       coding->decoder = decode_coding_sjis;
5304       coding->encoder = encode_coding_sjis;
5305       coding->common_flags
5306         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5307     }
5308   else if (EQ (coding_type, Qbig5))
5309     {
5310       coding->detector = detect_coding_big5;
5311       coding->decoder = decode_coding_big5;
5312       coding->encoder = encode_coding_big5;
5313       coding->common_flags
5314         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5315     }
5316   else                          /* EQ (coding_type, Qraw_text) */
5317     {
5318       coding->detector = NULL;
5319       coding->decoder = decode_coding_raw_text;
5320       coding->encoder = encode_coding_raw_text;
5321       if (! EQ (eol_type, Qunix))
5322         {
5323           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5324           if (! VECTORP (eol_type))
5325             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5326         }
5327
5328     }
5329
5330   return;
5331 }
5332
5333 /* Return a list of charsets supported by CODING.  */
5334
5335 Lisp_Object
5336 coding_charset_list (coding)
5337      struct coding_system *coding;
5338 {
5339   Lisp_Object attrs, charset_list;
5340
5341   CODING_GET_INFO (coding, attrs, charset_list);
5342   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5343     {
5344       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5345
5346       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5347         charset_list = Viso_2022_charset_list;
5348     }
5349   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5350     {
5351       charset_list = Vemacs_mule_charset_list;
5352     }
5353   return charset_list;
5354 }
5355
5356
5357 /* Return raw-text or one of its subsidiaries that has the same
5358    eol_type as CODING-SYSTEM.  */
5359
5360 Lisp_Object
5361 raw_text_coding_system (coding_system)
5362      Lisp_Object coding_system;
5363 {
5364   Lisp_Object spec, attrs;
5365   Lisp_Object eol_type, raw_text_eol_type;
5366
5367   if (NILP (coding_system))
5368     return Qraw_text;
5369   spec = CODING_SYSTEM_SPEC (coding_system);
5370   attrs = AREF (spec, 0);
5371
5372   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5373     return coding_system;
5374
5375   eol_type = AREF (spec, 2);
5376   if (VECTORP (eol_type))
5377     return Qraw_text;
5378   spec = CODING_SYSTEM_SPEC (Qraw_text);
5379   raw_text_eol_type = AREF (spec, 2);
5380   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5381           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5382           : AREF (raw_text_eol_type, 2));
5383 }
5384
5385
5386 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5387    does, return one of the subsidiary that has the same eol-spec as
5388    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
5389    inherit end-of-line format from the system's setting
5390    (system_eol_type).  */
5391
5392 Lisp_Object
5393 coding_inherit_eol_type (coding_system, parent)
5394      Lisp_Object coding_system, parent;
5395 {
5396   Lisp_Object spec, eol_type;
5397
5398   if (NILP (coding_system))
5399     coding_system = Qraw_text;
5400   spec = CODING_SYSTEM_SPEC (coding_system);
5401   eol_type = AREF (spec, 2);
5402   if (VECTORP (eol_type))
5403     {
5404       Lisp_Object parent_eol_type;
5405
5406       if (! NILP (parent))
5407         {
5408           Lisp_Object parent_spec;
5409
5410           parent_spec = CODING_SYSTEM_SPEC (parent);
5411           parent_eol_type = AREF (parent_spec, 2);
5412         }
5413       else
5414         parent_eol_type = system_eol_type;
5415       if (EQ (parent_eol_type, Qunix))
5416         coding_system = AREF (eol_type, 0);
5417       else if (EQ (parent_eol_type, Qdos))
5418         coding_system = AREF (eol_type, 1);
5419       else if (EQ (parent_eol_type, Qmac))
5420         coding_system = AREF (eol_type, 2);
5421     }
5422   return coding_system;
5423 }
5424
5425 /* Emacs has a mechanism to automatically detect a coding system if it
5426    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5427    it's impossible to distinguish some coding systems accurately
5428    because they use the same range of codes.  So, at first, coding
5429    systems are categorized into 7, those are:
5430
5431    o coding-category-emacs-mule
5432
5433         The category for a coding system which has the same code range
5434         as Emacs' internal format.  Assigned the coding-system (Lisp
5435         symbol) `emacs-mule' by default.
5436
5437    o coding-category-sjis
5438
5439         The category for a coding system which has the same code range
5440         as SJIS.  Assigned the coding-system (Lisp
5441         symbol) `japanese-shift-jis' by default.
5442
5443    o coding-category-iso-7
5444
5445         The category for a coding system which has the same code range
5446         as ISO2022 of 7-bit environment.  This doesn't use any locking
5447         shift and single shift functions.  This can encode/decode all
5448         charsets.  Assigned the coding-system (Lisp symbol)
5449         `iso-2022-7bit' by default.
5450
5451    o coding-category-iso-7-tight
5452
5453         Same as coding-category-iso-7 except that this can
5454         encode/decode only the specified charsets.
5455
5456    o coding-category-iso-8-1
5457
5458         The category for a coding system which has the same code range
5459         as ISO2022 of 8-bit environment and graphic plane 1 used only
5460         for DIMENSION1 charset.  This doesn't use any locking shift
5461         and single shift functions.  Assigned the coding-system (Lisp
5462         symbol) `iso-latin-1' by default.
5463
5464    o coding-category-iso-8-2
5465
5466         The category for a coding system which has the same code range
5467         as ISO2022 of 8-bit environment and graphic plane 1 used only
5468         for DIMENSION2 charset.  This doesn't use any locking shift
5469         and single shift functions.  Assigned the coding-system (Lisp
5470         symbol) `japanese-iso-8bit' by default.
5471
5472    o coding-category-iso-7-else
5473
5474         The category for a coding system which has the same code range
5475         as ISO2022 of 7-bit environemnt but uses locking shift or
5476         single shift functions.  Assigned the coding-system (Lisp
5477         symbol) `iso-2022-7bit-lock' by default.
5478
5479    o coding-category-iso-8-else
5480
5481         The category for a coding system which has the same code range
5482         as ISO2022 of 8-bit environemnt but uses locking shift or
5483         single shift functions.  Assigned the coding-system (Lisp
5484         symbol) `iso-2022-8bit-ss2' by default.
5485
5486    o coding-category-big5
5487
5488         The category for a coding system which has the same code range
5489         as BIG5.  Assigned the coding-system (Lisp symbol)
5490         `cn-big5' by default.
5491
5492    o coding-category-utf-8
5493
5494         The category for a coding system which has the same code range
5495         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5496         symbol) `utf-8' by default.
5497
5498    o coding-category-utf-16-be
5499
5500         The category for a coding system in which a text has an
5501         Unicode signature (cf. Unicode Standard) in the order of BIG
5502         endian at the head.  Assigned the coding-system (Lisp symbol)
5503         `utf-16-be' by default.
5504
5505    o coding-category-utf-16-le
5506
5507         The category for a coding system in which a text has an
5508         Unicode signature (cf. Unicode Standard) in the order of
5509         LITTLE endian at the head.  Assigned the coding-system (Lisp
5510         symbol) `utf-16-le' by default.
5511
5512    o coding-category-ccl
5513
5514         The category for a coding system of which encoder/decoder is
5515         written in CCL programs.  The default value is nil, i.e., no
5516         coding system is assigned.
5517
5518    o coding-category-binary
5519
5520         The category for a coding system not categorized in any of the
5521         above.  Assigned the coding-system (Lisp symbol)
5522         `no-conversion' by default.
5523
5524    Each of them is a Lisp symbol and the value is an actual
5525    `coding-system's (this is also a Lisp symbol) assigned by a user.
5526    What Emacs does actually is to detect a category of coding system.
5527    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5528    decide only one possible category, it selects a category of the
5529    highest priority.  Priorities of categories are also specified by a
5530    user in a Lisp variable `coding-category-list'.
5531
5532 */
5533
5534 #define EOL_SEEN_NONE   0
5535 #define EOL_SEEN_LF     1
5536 #define EOL_SEEN_CR     2
5537 #define EOL_SEEN_CRLF   4
5538
5539 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5540    SOURCE is encoded.  If CATEGORY is one of
5541    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5542    two-byte, else they are encoded by one-byte.
5543
5544    Return one of EOL_SEEN_XXX.  */
5545
5546 #define MAX_EOL_CHECK_COUNT 3
5547
5548 static int
5549 detect_eol (source, src_bytes, category)
5550      const unsigned char *source;
5551      EMACS_INT src_bytes;
5552      enum coding_category category;
5553 {
5554   const unsigned char *src = source, *src_end = src + src_bytes;
5555   unsigned char c;
5556   int total  = 0;
5557   int eol_seen = EOL_SEEN_NONE;
5558
5559   if ((1 << category) & CATEGORY_MASK_UTF_16)
5560     {
5561       int msb, lsb;
5562
5563       msb = category == (coding_category_utf_16_le
5564                          | coding_category_utf_16_le_nosig);
5565       lsb = 1 - msb;
5566
5567       while (src + 1 < src_end)
5568         {
5569           c = src[lsb];
5570           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5571             {
5572               int this_eol;
5573
5574               if (c == '\n')
5575                 this_eol = EOL_SEEN_LF;
5576               else if (src + 3 >= src_end
5577                        || src[msb + 2] != 0
5578                        || src[lsb + 2] != '\n')
5579                 this_eol = EOL_SEEN_CR;
5580               else
5581                 this_eol = EOL_SEEN_CRLF;
5582
5583               if (eol_seen == EOL_SEEN_NONE)
5584                 /* This is the first end-of-line.  */
5585                 eol_seen = this_eol;
5586               else if (eol_seen != this_eol)
5587                 {
5588                   /* The found type is different from what found before.  */
5589                   eol_seen = EOL_SEEN_LF;
5590                   break;
5591                 }
5592               if (++total == MAX_EOL_CHECK_COUNT)
5593                 break;
5594             }
5595           src += 2;
5596         }
5597     }
5598   else
5599     {
5600       while (src < src_end)
5601         {
5602           c = *src++;
5603           if (c == '\n' || c == '\r')
5604             {
5605               int this_eol;
5606
5607               if (c == '\n')
5608                 this_eol = EOL_SEEN_LF;
5609               else if (src >= src_end || *src != '\n')
5610                 this_eol = EOL_SEEN_CR;
5611               else
5612                 this_eol = EOL_SEEN_CRLF, src++;
5613
5614               if (eol_seen == EOL_SEEN_NONE)
5615                 /* This is the first end-of-line.  */
5616                 eol_seen = this_eol;
5617               else if (eol_seen != this_eol)
5618                 {
5619                   /* The found type is different from what found before.  */
5620                   eol_seen = EOL_SEEN_LF;
5621                   break;
5622                 }
5623               if (++total == MAX_EOL_CHECK_COUNT)
5624                 break;
5625             }
5626         }
5627     }
5628   return eol_seen;
5629 }
5630
5631
5632 static Lisp_Object
5633 adjust_coding_eol_type (coding, eol_seen)
5634      struct coding_system *coding;
5635      int eol_seen;
5636 {
5637   Lisp_Object eol_type;
5638
5639   eol_type = CODING_ID_EOL_TYPE (coding->id);
5640   if (eol_seen & EOL_SEEN_LF)
5641     {
5642       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5643       eol_type = Qunix;
5644     }
5645   else if (eol_seen & EOL_SEEN_CRLF)
5646     {
5647       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5648       eol_type = Qdos;
5649     }
5650   else if (eol_seen & EOL_SEEN_CR)
5651     {
5652       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5653       eol_type = Qmac;
5654     }
5655   return eol_type;
5656 }
5657
5658 /* Detect how a text specified in CODING is encoded.  If a coding
5659    system is detected, update fields of CODING by the detected coding
5660    system.  */
5661
5662 void
5663 detect_coding (coding)
5664      struct coding_system *coding;
5665 {
5666   const unsigned char *src, *src_end;
5667
5668   coding->consumed = coding->consumed_char = 0;
5669   coding->produced = coding->produced_char = 0;
5670   coding_set_source (coding);
5671
5672   src_end = coding->source + coding->src_bytes;
5673
5674   /* If we have not yet decided the text encoding type, detect it
5675      now.  */
5676   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5677     {
5678       int c, i;
5679       struct coding_detection_info detect_info;
5680
5681       detect_info.checked = detect_info.found = detect_info.rejected = 0;
5682       for (i = 0, src = coding->source; src < src_end; i++, src++)
5683         {
5684           c = *src;
5685           if (c & 0x80)
5686             break;
5687           if (c < 0x20
5688               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5689               && ! inhibit_iso_escape_detection
5690               && ! detect_info.checked)
5691             {
5692               coding->head_ascii = src - (coding->source + coding->consumed);
5693               if (detect_coding_iso_2022 (coding, &detect_info))
5694                 {
5695                   /* We have scanned the whole data.  */
5696                   if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5697                     /* We didn't find an 8-bit code.  */
5698                     src = src_end;
5699                   break;
5700                 }
5701             }
5702         }
5703       coding->head_ascii = src - (coding->source + coding->consumed);
5704
5705       if (coding->head_ascii < coding->src_bytes
5706           || detect_info.found)
5707         {
5708           enum coding_category category;
5709           struct coding_system *this;
5710
5711           if (coding->head_ascii == coding->src_bytes)
5712             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
5713             for (i = 0; i < coding_category_raw_text; i++)
5714               {
5715                 category = coding_priorities[i];
5716                 this = coding_categories + category;
5717                 if (detect_info.found & (1 << category))
5718                   break;
5719               }
5720           else
5721             for (i = 0; i < coding_category_raw_text; i++)
5722               {
5723                 category = coding_priorities[i];
5724                 this = coding_categories + category;
5725                 if (this->id < 0)
5726                   {
5727                     /* No coding system of this category is defined.  */
5728                     detect_info.rejected |= (1 << category);
5729                   }
5730                 else if (category >= coding_category_raw_text)
5731                   continue;
5732                 else if (detect_info.checked & (1 << category))
5733                   {
5734                     if (detect_info.found & (1 << category))
5735                       break;
5736                   }
5737                 else if ((*(this->detector)) (coding, &detect_info)
5738                          && detect_info.found & (1 << category))
5739                   {
5740                     if (category == coding_category_utf_16_auto)
5741                       {
5742                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5743                           category = coding_category_utf_16_le;
5744                         else
5745                           category = coding_category_utf_16_be;
5746                       }
5747                     break;
5748                   }
5749               }
5750
5751           if (i < coding_category_raw_text)
5752             setup_coding_system (CODING_ID_NAME (this->id), coding);
5753           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5754             setup_coding_system (Qraw_text, coding);
5755           else if (detect_info.rejected)
5756             for (i = 0; i < coding_category_raw_text; i++)
5757               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5758                 {
5759                   this = coding_categories + coding_priorities[i];
5760                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5761                   break;
5762                 }
5763         }
5764     }
5765   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5766            == coding_category_utf_16_auto)
5767     {
5768       Lisp_Object coding_systems;
5769       struct coding_detection_info detect_info;
5770
5771       coding_systems
5772         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5773       detect_info.found = detect_info.rejected = 0;
5774       if (CONSP (coding_systems)
5775           && detect_coding_utf_16 (coding, &detect_info))
5776         {
5777           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5778             setup_coding_system (XCAR (coding_systems), coding);
5779           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5780             setup_coding_system (XCDR (coding_systems), coding);
5781         }
5782     }
5783 }
5784
5785
5786 static void
5787 decode_eol (coding)
5788      struct coding_system *coding;
5789 {
5790   Lisp_Object eol_type;
5791   unsigned char *p, *pbeg, *pend;
5792
5793   eol_type = CODING_ID_EOL_TYPE (coding->id);
5794   if (EQ (eol_type, Qunix))
5795     return;
5796
5797   if (NILP (coding->dst_object))
5798     pbeg = coding->destination;
5799   else
5800     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5801   pend = pbeg + coding->produced;
5802
5803   if (VECTORP (eol_type))
5804     {
5805       int eol_seen = EOL_SEEN_NONE;
5806
5807       for (p = pbeg; p < pend; p++)
5808         {
5809           if (*p == '\n')
5810             eol_seen |= EOL_SEEN_LF;
5811           else if (*p == '\r')
5812             {
5813               if (p + 1 < pend && *(p + 1) == '\n')
5814                 {
5815                   eol_seen |= EOL_SEEN_CRLF;
5816                   p++;
5817                 }
5818               else
5819                 eol_seen |= EOL_SEEN_CR;
5820             }
5821         }
5822       if (eol_seen != EOL_SEEN_NONE
5823           && eol_seen != EOL_SEEN_LF
5824           && eol_seen != EOL_SEEN_CRLF
5825           && eol_seen != EOL_SEEN_CR)
5826         eol_seen = EOL_SEEN_LF;
5827       if (eol_seen != EOL_SEEN_NONE)
5828         eol_type = adjust_coding_eol_type (coding, eol_seen);
5829     }
5830
5831   if (EQ (eol_type, Qmac))
5832     {
5833       for (p = pbeg; p < pend; p++)
5834         if (*p == '\r')
5835           *p = '\n';
5836     }
5837   else if (EQ (eol_type, Qdos))
5838     {
5839       int n = 0;
5840
5841       if (NILP (coding->dst_object))
5842         {
5843           /* Start deleting '\r' from the tail to minimize the memory
5844              movement.  */
5845           for (p = pend - 2; p >= pbeg; p--)
5846             if (*p == '\r')
5847               {
5848                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5849                 n++;
5850               }
5851         }
5852       else
5853         {
5854           int pos_byte = coding->dst_pos_byte;
5855           int pos = coding->dst_pos;
5856           int pos_end = pos + coding->produced_char - 1;
5857
5858           while (pos < pos_end)
5859             {
5860               p = BYTE_POS_ADDR (pos_byte);
5861               if (*p == '\r' && p[1] == '\n')
5862                 {
5863                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5864                   n++;
5865                   pos_end--;
5866                 }
5867               pos++;
5868               if (coding->dst_multibyte)
5869                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
5870               else
5871                 pos_byte++;
5872             }
5873         }
5874       coding->produced -= n;
5875       coding->produced_char -= n;
5876     }
5877 }
5878
5879
5880 /* Return a translation table (or list of them) from coding system
5881    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5882    decoding (ENCODEP is zero). */
5883
5884 static Lisp_Object
5885 get_translation_table (attrs, encodep, max_lookup)
5886      Lisp_Object attrs;
5887      int encodep, *max_lookup;
5888 {
5889   Lisp_Object standard, translation_table;
5890   Lisp_Object val;
5891
5892   if (encodep)
5893     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5894       standard = Vstandard_translation_table_for_encode;
5895   else
5896     translation_table = CODING_ATTR_DECODE_TBL (attrs),
5897       standard = Vstandard_translation_table_for_decode;
5898   if (NILP (translation_table))
5899     translation_table = standard;
5900   else
5901     {
5902       if (SYMBOLP (translation_table))
5903         translation_table = Fget (translation_table, Qtranslation_table);
5904       else if (CONSP (translation_table))
5905         {
5906           translation_table = Fcopy_sequence (translation_table);
5907           for (val = translation_table; CONSP (val); val = XCDR (val))
5908             if (SYMBOLP (XCAR (val)))
5909               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5910         }
5911       if (CHAR_TABLE_P (standard))
5912         {
5913           if (CONSP (translation_table))
5914             translation_table = nconc2 (translation_table,
5915                                         Fcons (standard, Qnil));
5916           else
5917             translation_table = Fcons (translation_table,
5918                                        Fcons (standard, Qnil));
5919         }
5920     }
5921
5922   if (max_lookup)
5923     {
5924       *max_lookup = 1;
5925       if (CHAR_TABLE_P (translation_table)
5926           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5927         {
5928           val = XCHAR_TABLE (translation_table)->extras[1];
5929           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5930             *max_lookup = XFASTINT (val);
5931         }
5932       else if (CONSP (translation_table))
5933         {
5934           Lisp_Object tail, val;
5935
5936           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
5937             if (CHAR_TABLE_P (XCAR (tail))
5938                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
5939               {
5940                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
5941                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5942                   *max_lookup = XFASTINT (val);
5943               }
5944         }
5945     }
5946   return translation_table;
5947 }
5948
5949 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
5950   do {                                                          \
5951     trans = Qnil;                                               \
5952     if (CHAR_TABLE_P (table))                                   \
5953       {                                                         \
5954         trans = CHAR_TABLE_REF (table, c);                      \
5955         if (CHARACTERP (trans))                                 \
5956           c = XFASTINT (trans), trans = Qnil;                   \
5957       }                                                         \
5958     else if (CONSP (table))                                     \
5959       {                                                         \
5960         Lisp_Object tail;                                       \
5961                                                                 \
5962         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
5963           if (CHAR_TABLE_P (XCAR (tail)))                       \
5964             {                                                   \
5965               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
5966               if (CHARACTERP (trans))                           \
5967                 c = XFASTINT (trans), trans = Qnil;             \
5968               else if (! NILP (trans))                          \
5969                 break;                                          \
5970             }                                                   \
5971       }                                                         \
5972   } while (0)
5973
5974
5975 static Lisp_Object
5976 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5977      Lisp_Object val;
5978      int *buf, *buf_end;
5979      int last_block;
5980      int *from_nchars, *to_nchars;
5981 {
5982   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
5983      [TO-CHAR ...].  */
5984   if (CONSP (val))
5985     {
5986       Lisp_Object from, tail;
5987       int i, len;
5988
5989       for (tail = val; CONSP (tail); tail = XCDR (tail))
5990         {
5991           val = XCAR (tail);
5992           from = XCAR (val);
5993           len = ASIZE (from);
5994           for (i = 0; i < len; i++)
5995             {
5996               if (buf + i == buf_end)
5997                 {
5998                   if (! last_block)
5999                     return Qt;
6000                   break;
6001                 }
6002               if (XINT (AREF (from, i)) != buf[i])
6003                 break;
6004             }
6005           if (i == len)
6006             {
6007               val = XCDR (val);
6008               *from_nchars = len;
6009               break;
6010             }
6011         }
6012       if (! CONSP (tail))
6013         return Qnil;
6014     }
6015   if (VECTORP (val))
6016     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6017   else
6018     *buf = XINT (val);
6019   return val;
6020 }
6021
6022
6023 static int
6024 produce_chars (coding, translation_table, last_block)
6025      struct coding_system *coding;
6026      Lisp_Object translation_table;
6027      int last_block;
6028 {
6029   unsigned char *dst = coding->destination + coding->produced;
6030   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6031   EMACS_INT produced;
6032   EMACS_INT produced_chars = 0;
6033   int carryover = 0;
6034
6035   if (! coding->chars_at_source)
6036     {
6037       /* Source characters are in coding->charbuf.  */
6038       int *buf = coding->charbuf;
6039       int *buf_end = buf + coding->charbuf_used;
6040
6041       if (EQ (coding->src_object, coding->dst_object))
6042         {
6043           coding_set_source (coding);
6044           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6045         }
6046
6047       while (buf < buf_end)
6048         {
6049           int c = *buf, i;
6050
6051           if (c >= 0)
6052             {
6053               int from_nchars = 1, to_nchars = 1;
6054               Lisp_Object trans = Qnil;
6055
6056               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6057               if (! NILP (trans))
6058                 {
6059                   trans = get_translation (trans, buf, buf_end, last_block,
6060                                            &from_nchars, &to_nchars);
6061                   if (EQ (trans, Qt))
6062                     break;
6063                   c = *buf;
6064                 }
6065
6066               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6067                 {
6068                   dst = alloc_destination (coding,
6069                                            buf_end - buf
6070                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6071                                            dst);
6072                   if (EQ (coding->src_object, coding->dst_object))
6073                     {
6074                       coding_set_source (coding);
6075                       dst_end = ((unsigned char *) coding->source) + coding->consumed;
6076                     }
6077                   else
6078                     dst_end = coding->destination + coding->dst_bytes;
6079                 }
6080
6081               for (i = 0; i < to_nchars; i++)
6082                 {
6083                   if (i > 0)
6084                     c = XINT (AREF (trans, i));
6085                   if (coding->dst_multibyte
6086                       || ! CHAR_BYTE8_P (c))
6087                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6088                   else
6089                     *dst++ = CHAR_TO_BYTE8 (c);
6090                 }
6091               produced_chars += to_nchars;
6092               *buf++ = to_nchars;
6093               while (--from_nchars > 0)
6094                 *buf++ = 0;
6095             }
6096           else
6097             /* This is an annotation datum.  (-C) is the length.  */
6098             buf += -c;
6099         }
6100       carryover = buf_end - buf;
6101     }
6102   else
6103     {
6104       /* Source characters are at coding->source.  */
6105       const unsigned char *src = coding->source;
6106       const unsigned char *src_end = src + coding->consumed;
6107
6108       if (EQ (coding->dst_object, coding->src_object))
6109         dst_end = (unsigned char *) src;
6110       if (coding->src_multibyte != coding->dst_multibyte)
6111         {
6112           if (coding->src_multibyte)
6113             {
6114               int multibytep = 1;
6115               EMACS_INT consumed_chars;
6116
6117               while (1)
6118                 {
6119                   const unsigned char *src_base = src;
6120                   int c;
6121
6122                   ONE_MORE_BYTE (c);
6123                   if (dst == dst_end)
6124                     {
6125                       if (EQ (coding->src_object, coding->dst_object))
6126                         dst_end = (unsigned char *) src;
6127                       if (dst == dst_end)
6128                         {
6129                           EMACS_INT offset = src - coding->source;
6130
6131                           dst = alloc_destination (coding, src_end - src + 1,
6132                                                    dst);
6133                           dst_end = coding->destination + coding->dst_bytes;
6134                           coding_set_source (coding);
6135                           src = coding->source + offset;
6136                           src_end = coding->source + coding->src_bytes;
6137                           if (EQ (coding->src_object, coding->dst_object))
6138                             dst_end = (unsigned char *) src;
6139                         }
6140                     }
6141                   *dst++ = c;
6142                   produced_chars++;
6143                 }
6144             no_more_source:
6145               ;
6146             }
6147           else
6148             while (src < src_end)
6149               {
6150                 int multibytep = 1;
6151                 int c = *src++;
6152
6153                 if (dst >= dst_end - 1)
6154                   {
6155                     if (EQ (coding->src_object, coding->dst_object))
6156                       dst_end = (unsigned char *) src;
6157                     if (dst >= dst_end - 1)
6158                       {
6159                         EMACS_INT offset = src - coding->source;
6160                         EMACS_INT more_bytes;
6161
6162                         if (EQ (coding->src_object, coding->dst_object))
6163                           more_bytes = ((src_end - src) / 2) + 2;
6164                         else
6165                           more_bytes = src_end - src + 2;
6166                         dst = alloc_destination (coding, more_bytes, dst);
6167                         dst_end = coding->destination + coding->dst_bytes;
6168                         coding_set_source (coding);
6169                         src = coding->source + offset;
6170                         src_end = coding->source + coding->src_bytes;
6171                         if (EQ (coding->src_object, coding->dst_object))
6172                           dst_end = (unsigned char *) src;
6173                       }
6174                   }
6175                 EMIT_ONE_BYTE (c);
6176               }
6177         }
6178       else
6179         {
6180           if (!EQ (coding->src_object, coding->dst_object))
6181             {
6182               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6183
6184               if (require > 0)
6185                 {
6186                   EMACS_INT offset = src - coding->source;
6187
6188                   dst = alloc_destination (coding, require, dst);
6189                   coding_set_source (coding);
6190                   src = coding->source + offset;
6191                   src_end = coding->source + coding->src_bytes;
6192                 }
6193             }
6194           produced_chars = coding->consumed_char;
6195           while (src < src_end)
6196             *dst++ = *src++;
6197         }
6198     }
6199
6200   produced = dst - (coding->destination + coding->produced);
6201   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6202     insert_from_gap (produced_chars, produced);
6203   coding->produced += produced;
6204   coding->produced_char += produced_chars;
6205   return carryover;
6206 }
6207
6208 /* Compose text in CODING->object according to the annotation data at
6209    CHARBUF.  CHARBUF is an array:
6210      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
6211  */
6212
6213 static INLINE void
6214 produce_composition (coding, charbuf, pos)
6215      struct coding_system *coding;
6216      int *charbuf;
6217      EMACS_INT pos;
6218 {
6219   int len;
6220   EMACS_INT to;
6221   enum composition_method method;
6222   Lisp_Object components;
6223
6224   len = -charbuf[0];
6225   to = pos + charbuf[2];
6226   if (to <= pos)
6227     return;
6228   method = (enum composition_method) (charbuf[3]);
6229
6230   if (method == COMPOSITION_RELATIVE)
6231     components = Qnil;
6232   else if (method >= COMPOSITION_WITH_RULE
6233            && method <= COMPOSITION_WITH_RULE_ALTCHARS)
6234     {
6235       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6236       int i;
6237
6238       len -= 4;
6239       charbuf += 4;
6240       for (i = 0; i < len; i++)
6241         {
6242           args[i] = make_number (charbuf[i]);
6243           if (charbuf[i] < 0)
6244             return;
6245         }
6246       components = (method == COMPOSITION_WITH_ALTCHARS
6247                     ? Fstring (len, args) : Fvector (len, args));
6248     }
6249   else
6250     return;
6251   compose_text (pos, to, components, Qnil, coding->dst_object);
6252 }
6253
6254
6255 /* Put `charset' property on text in CODING->object according to
6256    the annotation data at CHARBUF.  CHARBUF is an array:
6257      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6258  */
6259
6260 static INLINE void
6261 produce_charset (coding, charbuf, pos)
6262      struct coding_system *coding;
6263      int *charbuf;
6264      EMACS_INT pos;
6265 {
6266   EMACS_INT from = pos - charbuf[2];
6267   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6268
6269   Fput_text_property (make_number (from), make_number (pos),
6270                       Qcharset, CHARSET_NAME (charset),
6271                       coding->dst_object);
6272 }
6273
6274
6275 #define CHARBUF_SIZE 0x4000
6276
6277 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6278   do {                                                                  \
6279     int size = CHARBUF_SIZE;;                                           \
6280                                                                         \
6281     coding->charbuf = NULL;                                             \
6282     while (size > 1024)                                                 \
6283       {                                                                 \
6284         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6285         if (coding->charbuf)                                            \
6286           break;                                                        \
6287         size >>= 1;                                                     \
6288       }                                                                 \
6289     if (! coding->charbuf)                                              \
6290       {                                                                 \
6291         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6292         return coding->result;                                          \
6293       }                                                                 \
6294     coding->charbuf_size = size;                                        \
6295   } while (0)
6296
6297
6298 static void
6299 produce_annotation (coding, pos)
6300      struct coding_system *coding;
6301      EMACS_INT pos;
6302 {
6303   int *charbuf = coding->charbuf;
6304   int *charbuf_end = charbuf + coding->charbuf_used;
6305
6306   if (NILP (coding->dst_object))
6307     return;
6308
6309   while (charbuf < charbuf_end)
6310     {
6311       if (*charbuf >= 0)
6312         pos += *charbuf++;
6313       else
6314         {
6315           int len = -*charbuf;
6316           switch (charbuf[1])
6317             {
6318             case CODING_ANNOTATE_COMPOSITION_MASK:
6319               produce_composition (coding, charbuf, pos);
6320               break;
6321             case CODING_ANNOTATE_CHARSET_MASK:
6322               produce_charset (coding, charbuf, pos);
6323               break;
6324             default:
6325               abort ();
6326             }
6327           charbuf += len;
6328         }
6329     }
6330 }
6331
6332 /* Decode the data at CODING->src_object into CODING->dst_object.
6333    CODING->src_object is a buffer, a string, or nil.
6334    CODING->dst_object is a buffer.
6335
6336    If CODING->src_object is a buffer, it must be the current buffer.
6337    In this case, if CODING->src_pos is positive, it is a position of
6338    the source text in the buffer, otherwise, the source text is in the
6339    gap area of the buffer, and CODING->src_pos specifies the offset of
6340    the text from GPT (which must be the same as PT).  If this is the
6341    same buffer as CODING->dst_object, CODING->src_pos must be
6342    negative.
6343
6344    If CODING->src_object is a string, CODING->src_pos is an index to
6345    that string.
6346
6347    If CODING->src_object is nil, CODING->source must already point to
6348    the non-relocatable memory area.  In this case, CODING->src_pos is
6349    an offset from CODING->source.
6350
6351    The decoded data is inserted at the current point of the buffer
6352    CODING->dst_object.
6353 */
6354
6355 static int
6356 decode_coding (coding)
6357      struct coding_system *coding;
6358 {
6359   Lisp_Object attrs;
6360   Lisp_Object undo_list;
6361   Lisp_Object translation_table;
6362   int carryover;
6363   int i;
6364
6365   if (BUFFERP (coding->src_object)
6366       && coding->src_pos > 0
6367       && coding->src_pos < GPT
6368       && coding->src_pos + coding->src_chars > GPT)
6369     move_gap_both (coding->src_pos, coding->src_pos_byte);
6370
6371   undo_list = Qt;
6372   if (BUFFERP (coding->dst_object))
6373     {
6374       if (current_buffer != XBUFFER (coding->dst_object))
6375         set_buffer_internal (XBUFFER (coding->dst_object));
6376       if (GPT != PT)
6377         move_gap_both (PT, PT_BYTE);
6378       undo_list = current_buffer->undo_list;
6379       current_buffer->undo_list = Qt;
6380     }
6381
6382   coding->consumed = coding->consumed_char = 0;
6383   coding->produced = coding->produced_char = 0;
6384   coding->chars_at_source = 0;
6385   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6386   coding->errors = 0;
6387
6388   ALLOC_CONVERSION_WORK_AREA (coding);
6389
6390   attrs = CODING_ID_ATTRS (coding->id);
6391   translation_table = get_translation_table (attrs, 0, NULL);
6392
6393   carryover = 0;
6394   do
6395     {
6396       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6397
6398       coding_set_source (coding);
6399       coding->annotated = 0;
6400       coding->charbuf_used = carryover;
6401       (*(coding->decoder)) (coding);
6402       coding_set_destination (coding);
6403       carryover = produce_chars (coding, translation_table, 0);
6404       if (coding->annotated)
6405         produce_annotation (coding, pos);
6406       for (i = 0; i < carryover; i++)
6407         coding->charbuf[i]
6408           = coding->charbuf[coding->charbuf_used - carryover + i];
6409     }
6410   while (coding->consumed < coding->src_bytes
6411          && (coding->result == CODING_RESULT_SUCCESS
6412              || coding->result == CODING_RESULT_INVALID_SRC));
6413
6414   if (carryover > 0)
6415     {
6416       coding_set_destination (coding);
6417       coding->charbuf_used = carryover;
6418       produce_chars (coding, translation_table, 1);
6419     }
6420
6421   coding->carryover_bytes = 0;
6422   if (coding->consumed < coding->src_bytes)
6423     {
6424       int nbytes = coding->src_bytes - coding->consumed;
6425       const unsigned char *src;
6426
6427       coding_set_source (coding);
6428       coding_set_destination (coding);
6429       src = coding->source + coding->consumed;
6430
6431       if (coding->mode & CODING_MODE_LAST_BLOCK)
6432         {
6433           /* Flush out unprocessed data as binary chars.  We are sure
6434              that the number of data is less than the size of
6435              coding->charbuf.  */
6436           coding->charbuf_used = 0;
6437           while (nbytes-- > 0)
6438             {
6439               int c = *src++;
6440
6441               if (c & 0x80)
6442                 c = BYTE8_TO_CHAR (c);
6443               coding->charbuf[coding->charbuf_used++] = c;
6444             }
6445           produce_chars (coding, Qnil, 1);
6446         }
6447       else
6448         {
6449           /* Record unprocessed bytes in coding->carryover.  We are
6450              sure that the number of data is less than the size of
6451              coding->carryover.  */
6452           unsigned char *p = coding->carryover;
6453
6454           coding->carryover_bytes = nbytes;
6455           while (nbytes-- > 0)
6456             *p++ = *src++;
6457         }
6458       coding->consumed = coding->src_bytes;
6459     }
6460
6461   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6462     decode_eol (coding);
6463   if (BUFFERP (coding->dst_object))
6464     {
6465       current_buffer->undo_list = undo_list;
6466       record_insert (coding->dst_pos, coding->produced_char);
6467     }
6468   return coding->result;
6469 }
6470
6471
6472 /* Extract an annotation datum from a composition starting at POS and
6473    ending before LIMIT of CODING->src_object (buffer or string), store
6474    the data in BUF, set *STOP to a starting position of the next
6475    composition (if any) or to LIMIT, and return the address of the
6476    next element of BUF.
6477
6478    If such an annotation is not found, set *STOP to a starting
6479    position of a composition after POS (if any) or to LIMIT, and
6480    return BUF.  */
6481
6482 static INLINE int *
6483 handle_composition_annotation (pos, limit, coding, buf, stop)
6484      EMACS_INT pos, limit;
6485      struct coding_system *coding;
6486      int *buf;
6487      EMACS_INT *stop;
6488 {
6489   EMACS_INT start, end;
6490   Lisp_Object prop;
6491
6492   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6493       || end > limit)
6494     *stop = limit;
6495   else if (start > pos)
6496     *stop = start;
6497   else
6498     {
6499       if (start == pos)
6500         {
6501           /* We found a composition.  Store the corresponding
6502              annotation data in BUF.  */
6503           int *head = buf;
6504           enum composition_method method = COMPOSITION_METHOD (prop);
6505           int nchars = COMPOSITION_LENGTH (prop);
6506
6507           ADD_COMPOSITION_DATA (buf, nchars, method);
6508           if (method != COMPOSITION_RELATIVE)
6509             {
6510               Lisp_Object components;
6511               int len, i, i_byte;
6512
6513               components = COMPOSITION_COMPONENTS (prop);
6514               if (VECTORP (components))
6515                 {
6516                   len = XVECTOR (components)->size;
6517                   for (i = 0; i < len; i++)
6518                     *buf++ = XINT (AREF (components, i));
6519                 }
6520               else if (STRINGP (components))
6521                 {
6522                   len = SCHARS (components);
6523                   i = i_byte = 0;
6524                   while (i < len)
6525                     {
6526                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6527                       buf++;
6528                     }
6529                 }
6530               else if (INTEGERP (components))
6531                 {
6532                   len = 1;
6533                   *buf++ = XINT (components);
6534                 }
6535               else if (CONSP (components))
6536                 {
6537                   for (len = 0; CONSP (components);
6538                        len++, components = XCDR (components))
6539                     *buf++ = XINT (XCAR (components));
6540                 }
6541               else
6542                 abort ();
6543               *head -= len;
6544             }
6545         }
6546
6547       if (find_composition (end, limit, &start, &end, &prop,
6548                             coding->src_object)
6549           && end <= limit)
6550         *stop = start;
6551       else
6552         *stop = limit;
6553     }
6554   return buf;
6555 }
6556
6557
6558 /* Extract an annotation datum from a text property `charset' at POS of
6559    CODING->src_object (buffer of string), store the data in BUF, set
6560    *STOP to the position where the value of `charset' property changes
6561    (limiting by LIMIT), and return the address of the next element of
6562    BUF.
6563
6564    If the property value is nil, set *STOP to the position where the
6565    property value is non-nil (limiting by LIMIT), and return BUF.  */
6566
6567 static INLINE int *
6568 handle_charset_annotation (pos, limit, coding, buf, stop)
6569      EMACS_INT pos, limit;
6570      struct coding_system *coding;
6571      int *buf;
6572      EMACS_INT *stop;
6573 {
6574   Lisp_Object val, next;
6575   int id;
6576
6577   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6578   if (! NILP (val) && CHARSETP (val))
6579     id = XINT (CHARSET_SYMBOL_ID (val));
6580   else
6581     id = -1;
6582   ADD_CHARSET_DATA (buf, 0, id);
6583   next = Fnext_single_property_change (make_number (pos), Qcharset,
6584                                        coding->src_object,
6585                                        make_number (limit));
6586   *stop = XINT (next);
6587   return buf;
6588 }
6589
6590
6591 static void
6592 consume_chars (coding, translation_table, max_lookup)
6593      struct coding_system *coding;
6594      Lisp_Object translation_table;
6595      int max_lookup;
6596 {
6597   int *buf = coding->charbuf;
6598   int *buf_end = coding->charbuf + coding->charbuf_size;
6599   const unsigned char *src = coding->source + coding->consumed;
6600   const unsigned char *src_end = coding->source + coding->src_bytes;
6601   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6602   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6603   int multibytep = coding->src_multibyte;
6604   Lisp_Object eol_type;
6605   int c;
6606   EMACS_INT stop, stop_composition, stop_charset;
6607   int *lookup_buf = NULL;
6608
6609   if (! NILP (translation_table))
6610     lookup_buf = alloca (sizeof (int) * max_lookup);
6611
6612   eol_type = CODING_ID_EOL_TYPE (coding->id);
6613   if (VECTORP (eol_type))
6614     eol_type = Qunix;
6615
6616   /* Note: composition handling is not yet implemented.  */
6617   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6618
6619   if (NILP (coding->src_object))
6620     stop = stop_composition = stop_charset = end_pos;
6621   else
6622     {
6623       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6624         stop = stop_composition = pos;
6625       else
6626         stop = stop_composition = end_pos;
6627       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6628         stop = stop_charset = pos;
6629       else
6630         stop_charset = end_pos;
6631     }
6632
6633   /* Compensate for CRLF and conversion.  */
6634   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6635   while (buf < buf_end)
6636     {
6637       Lisp_Object trans;
6638
6639       if (pos == stop)
6640         {
6641           if (pos == end_pos)
6642             break;
6643           if (pos == stop_composition)
6644             buf = handle_composition_annotation (pos, end_pos, coding,
6645                                                  buf, &stop_composition);
6646           if (pos == stop_charset)
6647             buf = handle_charset_annotation (pos, end_pos, coding,
6648                                              buf, &stop_charset);
6649           stop = (stop_composition < stop_charset
6650                   ? stop_composition : stop_charset);
6651         }
6652
6653       if (! multibytep)
6654         {
6655           EMACS_INT bytes;
6656
6657           if (coding->encoder == encode_coding_raw_text)
6658             c = *src++, pos++;
6659           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6660             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
6661           else
6662             c = BYTE8_TO_CHAR (*src), src++, pos++;
6663         }
6664       else
6665         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
6666       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6667         c = '\n';
6668       if (! EQ (eol_type, Qunix))
6669         {
6670           if (c == '\n')
6671             {
6672               if (EQ (eol_type, Qdos))
6673                 *buf++ = '\r';
6674               else
6675                 c = '\r';
6676             }
6677         }
6678
6679       trans = Qnil;
6680       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6681       if (NILP (trans))
6682         *buf++ = c;
6683       else
6684         {
6685           int from_nchars = 1, to_nchars = 1;
6686           int *lookup_buf_end;
6687           const unsigned char *p = src;
6688           int i;
6689
6690           lookup_buf[0] = c;
6691           for (i = 1; i < max_lookup && p < src_end; i++)
6692             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6693           lookup_buf_end = lookup_buf + i;
6694           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6695                                    &from_nchars, &to_nchars);
6696           if (EQ (trans, Qt)
6697               || buf + to_nchars > buf_end)
6698             break;
6699           *buf++ = *lookup_buf;
6700           for (i = 1; i < to_nchars; i++)
6701             *buf++ = XINT (AREF (trans, i));
6702           for (i = 1; i < from_nchars; i++, pos++)
6703             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6704         }
6705     }
6706
6707   coding->consumed = src - coding->source;
6708   coding->consumed_char = pos - coding->src_pos;
6709   coding->charbuf_used = buf - coding->charbuf;
6710   coding->chars_at_source = 0;
6711 }
6712
6713
6714 /* Encode the text at CODING->src_object into CODING->dst_object.
6715    CODING->src_object is a buffer or a string.
6716    CODING->dst_object is a buffer or nil.
6717
6718    If CODING->src_object is a buffer, it must be the current buffer.
6719    In this case, if CODING->src_pos is positive, it is a position of
6720    the source text in the buffer, otherwise. the source text is in the
6721    gap area of the buffer, and coding->src_pos specifies the offset of
6722    the text from GPT (which must be the same as PT).  If this is the
6723    same buffer as CODING->dst_object, CODING->src_pos must be
6724    negative and CODING should not have `pre-write-conversion'.
6725
6726    If CODING->src_object is a string, CODING should not have
6727    `pre-write-conversion'.
6728
6729    If CODING->dst_object is a buffer, the encoded data is inserted at
6730    the current point of that buffer.
6731
6732    If CODING->dst_object is nil, the encoded data is placed at the
6733    memory area specified by CODING->destination.  */
6734
6735 static int
6736 encode_coding (coding)
6737      struct coding_system *coding;
6738 {
6739   Lisp_Object attrs;
6740   Lisp_Object translation_table;
6741   int max_lookup;
6742
6743   attrs = CODING_ID_ATTRS (coding->id);
6744   if (coding->encoder == encode_coding_raw_text)
6745     translation_table = Qnil, max_lookup = 0;
6746   else
6747     translation_table = get_translation_table (attrs, 1, &max_lookup);
6748
6749   if (BUFFERP (coding->dst_object))
6750     {
6751       set_buffer_internal (XBUFFER (coding->dst_object));
6752       coding->dst_multibyte
6753         = ! NILP (current_buffer->enable_multibyte_characters);
6754     }
6755
6756   coding->consumed = coding->consumed_char = 0;
6757   coding->produced = coding->produced_char = 0;
6758   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6759   coding->errors = 0;
6760
6761   ALLOC_CONVERSION_WORK_AREA (coding);
6762
6763   do {
6764     coding_set_source (coding);
6765     consume_chars (coding, translation_table, max_lookup);
6766     coding_set_destination (coding);
6767     (*(coding->encoder)) (coding);
6768   } while (coding->consumed_char < coding->src_chars);
6769
6770   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
6771     insert_from_gap (coding->produced_char, coding->produced);
6772
6773   return (coding->result);
6774 }
6775
6776
6777 /* Name (or base name) of work buffer for code conversion.  */
6778 static Lisp_Object Vcode_conversion_workbuf_name;
6779
6780 /* A working buffer used by the top level conversion.  Once it is
6781    created, it is never destroyed.  It has the name
6782    Vcode_conversion_workbuf_name.  The other working buffers are
6783    destroyed after the use is finished, and their names are modified
6784    versions of Vcode_conversion_workbuf_name.  */
6785 static Lisp_Object Vcode_conversion_reused_workbuf;
6786
6787 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6788 static int reused_workbuf_in_use;
6789
6790
6791 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6792    multibyteness of returning buffer.  */
6793
6794 static Lisp_Object
6795 make_conversion_work_buffer (multibyte)
6796      int multibyte;
6797 {
6798   Lisp_Object name, workbuf;
6799   struct buffer *current;
6800
6801   if (reused_workbuf_in_use++)
6802     {
6803       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6804       workbuf = Fget_buffer_create (name);
6805     }
6806   else
6807     {
6808       name = Vcode_conversion_workbuf_name;
6809       workbuf = Fget_buffer_create (name);
6810       if (NILP (Vcode_conversion_reused_workbuf))
6811         Vcode_conversion_reused_workbuf = workbuf;
6812     }
6813   current = current_buffer;
6814   set_buffer_internal (XBUFFER (workbuf));
6815   Ferase_buffer ();
6816   current_buffer->undo_list = Qt;
6817   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6818   set_buffer_internal (current);
6819   return workbuf;
6820 }
6821
6822
6823 static Lisp_Object
6824 code_conversion_restore (arg)
6825      Lisp_Object arg;
6826 {
6827   Lisp_Object current, workbuf;
6828   struct gcpro gcpro1;
6829
6830   GCPRO1 (arg);
6831   current = XCAR (arg);
6832   workbuf = XCDR (arg);
6833   if (! NILP (workbuf))
6834     {
6835       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6836         reused_workbuf_in_use = 0;
6837       else if (! NILP (Fbuffer_live_p (workbuf)))
6838         Fkill_buffer (workbuf);
6839     }
6840   set_buffer_internal (XBUFFER (current));
6841   UNGCPRO;
6842   return Qnil;
6843 }
6844
6845 Lisp_Object
6846 code_conversion_save (with_work_buf, multibyte)
6847      int with_work_buf, multibyte;
6848 {
6849   Lisp_Object workbuf = Qnil;
6850
6851   if (with_work_buf)
6852     workbuf = make_conversion_work_buffer (multibyte);
6853   record_unwind_protect (code_conversion_restore,
6854                          Fcons (Fcurrent_buffer (), workbuf));
6855   return workbuf;
6856 }
6857
6858 int
6859 decode_coding_gap (coding, chars, bytes)
6860      struct coding_system *coding;
6861      EMACS_INT chars, bytes;
6862 {
6863   int count = specpdl_ptr - specpdl;
6864   Lisp_Object attrs;
6865
6866   code_conversion_save (0, 0);
6867
6868   coding->src_object = Fcurrent_buffer ();
6869   coding->src_chars = chars;
6870   coding->src_bytes = bytes;
6871   coding->src_pos = -chars;
6872   coding->src_pos_byte = -bytes;
6873   coding->src_multibyte = chars < bytes;
6874   coding->dst_object = coding->src_object;
6875   coding->dst_pos = PT;
6876   coding->dst_pos_byte = PT_BYTE;
6877   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6878
6879   if (CODING_REQUIRE_DETECTION (coding))
6880     detect_coding (coding);
6881
6882   coding->mode |= CODING_MODE_LAST_BLOCK;
6883   current_buffer->text->inhibit_shrinking = 1;
6884   decode_coding (coding);
6885   current_buffer->text->inhibit_shrinking = 0;
6886
6887   attrs = CODING_ID_ATTRS (coding->id);
6888   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6889     {
6890       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6891       Lisp_Object val;
6892
6893       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6894       val = call1 (CODING_ATTR_POST_READ (attrs),
6895                    make_number (coding->produced_char));
6896       CHECK_NATNUM (val);
6897       coding->produced_char += Z - prev_Z;
6898       coding->produced += Z_BYTE - prev_Z_BYTE;
6899     }
6900
6901   unbind_to (count, Qnil);
6902   return coding->result;
6903 }
6904
6905 int
6906 encode_coding_gap (coding, chars, bytes)
6907      struct coding_system *coding;
6908      EMACS_INT chars, bytes;
6909 {
6910   int count = specpdl_ptr - specpdl;
6911
6912   code_conversion_save (0, 0);
6913
6914   coding->src_object = Fcurrent_buffer ();
6915   coding->src_chars = chars;
6916   coding->src_bytes = bytes;
6917   coding->src_pos = -chars;
6918   coding->src_pos_byte = -bytes;
6919   coding->src_multibyte = chars < bytes;
6920   coding->dst_object = coding->src_object;
6921   coding->dst_pos = PT;
6922   coding->dst_pos_byte = PT_BYTE;
6923
6924   encode_coding (coding);
6925
6926   unbind_to (count, Qnil);
6927   return coding->result;
6928 }
6929
6930
6931 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6932    SRC_OBJECT into DST_OBJECT by coding context CODING.
6933
6934    SRC_OBJECT is a buffer, a string, or Qnil.
6935
6936    If it is a buffer, the text is at point of the buffer.  FROM and TO
6937    are positions in the buffer.
6938
6939    If it is a string, the text is at the beginning of the string.
6940    FROM and TO are indices to the string.
6941
6942    If it is nil, the text is at coding->source.  FROM and TO are
6943    indices to coding->source.
6944
6945    DST_OBJECT is a buffer, Qt, or Qnil.
6946
6947    If it is a buffer, the decoded text is inserted at point of the
6948    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6949    is deleted.
6950
6951    If it is Qt, a string is made from the decoded text, and
6952    set in CODING->dst_object.
6953
6954    If it is Qnil, the decoded text is stored at CODING->destination.
6955    The caller must allocate CODING->dst_bytes bytes at
6956    CODING->destination by xmalloc.  If the decoded text is longer than
6957    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6958  */
6959
6960 void
6961 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6962                       dst_object)
6963      struct coding_system *coding;
6964      Lisp_Object src_object;
6965      EMACS_INT from, from_byte, to, to_byte;
6966      Lisp_Object dst_object;
6967 {
6968   int count = specpdl_ptr - specpdl;
6969   unsigned char *destination;
6970   EMACS_INT dst_bytes;
6971   EMACS_INT chars = to - from;
6972   EMACS_INT bytes = to_byte - from_byte;
6973   Lisp_Object attrs;
6974   int saved_pt = -1, saved_pt_byte;
6975   int need_marker_adjustment = 0;
6976   Lisp_Object old_deactivate_mark;
6977
6978   old_deactivate_mark = Vdeactivate_mark;
6979
6980   if (NILP (dst_object))
6981     {
6982       destination = coding->destination;
6983       dst_bytes = coding->dst_bytes;
6984     }
6985
6986   coding->src_object = src_object;
6987   coding->src_chars = chars;
6988   coding->src_bytes = bytes;
6989   coding->src_multibyte = chars < bytes;
6990
6991   if (STRINGP (src_object))
6992     {
6993       coding->src_pos = from;
6994       coding->src_pos_byte = from_byte;
6995     }
6996   else if (BUFFERP (src_object))
6997     {
6998       set_buffer_internal (XBUFFER (src_object));
6999       if (from != GPT)
7000         move_gap_both (from, from_byte);
7001       if (EQ (src_object, dst_object))
7002         {
7003           struct Lisp_Marker *tail;
7004
7005           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7006             {
7007               tail->need_adjustment
7008                 = tail->charpos == (tail->insertion_type ? from : to);
7009               need_marker_adjustment |= tail->need_adjustment;
7010             }
7011           saved_pt = PT, saved_pt_byte = PT_BYTE;
7012           TEMP_SET_PT_BOTH (from, from_byte);
7013           current_buffer->text->inhibit_shrinking = 1;
7014           del_range_both (from, from_byte, to, to_byte, 1);
7015           coding->src_pos = -chars;
7016           coding->src_pos_byte = -bytes;
7017         }
7018       else
7019         {
7020           coding->src_pos = from;
7021           coding->src_pos_byte = from_byte;
7022         }
7023     }
7024
7025   if (CODING_REQUIRE_DETECTION (coding))
7026     detect_coding (coding);
7027   attrs = CODING_ID_ATTRS (coding->id);
7028
7029   if (EQ (dst_object, Qt)
7030       || (! NILP (CODING_ATTR_POST_READ (attrs))
7031           && NILP (dst_object)))
7032     {
7033       coding->dst_object = code_conversion_save (1, 1);
7034       coding->dst_pos = BEG;
7035       coding->dst_pos_byte = BEG_BYTE;
7036       coding->dst_multibyte = 1;
7037     }
7038   else if (BUFFERP (dst_object))
7039     {
7040       code_conversion_save (0, 0);
7041       coding->dst_object = dst_object;
7042       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7043       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7044       coding->dst_multibyte
7045         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7046     }
7047   else
7048     {
7049       code_conversion_save (0, 0);
7050       coding->dst_object = Qnil;
7051       coding->dst_multibyte = 1;
7052     }
7053
7054   decode_coding (coding);
7055
7056   if (BUFFERP (coding->dst_object))
7057     set_buffer_internal (XBUFFER (coding->dst_object));
7058
7059   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7060     {
7061       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7062       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7063       Lisp_Object val;
7064
7065       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7066       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7067               old_deactivate_mark);
7068       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7069                         make_number (coding->produced_char));
7070       UNGCPRO;
7071       CHECK_NATNUM (val);
7072       coding->produced_char += Z - prev_Z;
7073       coding->produced += Z_BYTE - prev_Z_BYTE;
7074     }
7075
7076   if (EQ (dst_object, Qt))
7077     {
7078       coding->dst_object = Fbuffer_string ();
7079     }
7080   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7081     {
7082       set_buffer_internal (XBUFFER (coding->dst_object));
7083       if (dst_bytes < coding->produced)
7084         {
7085           destination = xrealloc (destination, coding->produced);
7086           if (! destination)
7087             {
7088               record_conversion_result (coding,
7089                                         CODING_RESULT_INSUFFICIENT_DST);
7090               unbind_to (count, Qnil);
7091               return;
7092             }
7093           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7094             move_gap_both (BEGV, BEGV_BYTE);
7095           bcopy (BEGV_ADDR, destination, coding->produced);
7096           coding->destination = destination;
7097         }
7098     }
7099
7100   if (saved_pt >= 0)
7101     {
7102       /* This is the case of:
7103          (BUFFERP (src_object) && EQ (src_object, dst_object))
7104          As we have moved PT while replacing the original buffer
7105          contents, we must recover it now.  */
7106       set_buffer_internal (XBUFFER (src_object));
7107       current_buffer->text->inhibit_shrinking = 0;
7108       if (saved_pt < from)
7109         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7110       else if (saved_pt < from + chars)
7111         TEMP_SET_PT_BOTH (from, from_byte);
7112       else if (! NILP (current_buffer->enable_multibyte_characters))
7113         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7114                           saved_pt_byte + (coding->produced - bytes));
7115       else
7116         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7117                           saved_pt_byte + (coding->produced - bytes));
7118
7119       if (need_marker_adjustment)
7120         {
7121           struct Lisp_Marker *tail;
7122
7123           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7124             if (tail->need_adjustment)
7125               {
7126                 tail->need_adjustment = 0;
7127                 if (tail->insertion_type)
7128                   {
7129                     tail->bytepos = from_byte;
7130                     tail->charpos = from;
7131                   }
7132                 else
7133                   {
7134                     tail->bytepos = from_byte + coding->produced;
7135                     tail->charpos
7136                       = (NILP (current_buffer->enable_multibyte_characters)
7137                          ? tail->bytepos : from + coding->produced_char);
7138                   }
7139               }
7140         }
7141     }
7142
7143   Vdeactivate_mark = old_deactivate_mark;
7144   unbind_to (count, coding->dst_object);
7145 }
7146
7147
7148 void
7149 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7150                       dst_object)
7151      struct coding_system *coding;
7152      Lisp_Object src_object;
7153      EMACS_INT from, from_byte, to, to_byte;
7154      Lisp_Object dst_object;
7155 {
7156   int count = specpdl_ptr - specpdl;
7157   EMACS_INT chars = to - from;
7158   EMACS_INT bytes = to_byte - from_byte;
7159   Lisp_Object attrs;
7160   int saved_pt = -1, saved_pt_byte;
7161   int need_marker_adjustment = 0;
7162   int kill_src_buffer = 0;
7163   Lisp_Object old_deactivate_mark;
7164
7165   old_deactivate_mark = Vdeactivate_mark;
7166
7167   coding->src_object = src_object;
7168   coding->src_chars = chars;
7169   coding->src_bytes = bytes;
7170   coding->src_multibyte = chars < bytes;
7171
7172   attrs = CODING_ID_ATTRS (coding->id);
7173
7174   if (EQ (src_object, dst_object))
7175     {
7176       struct Lisp_Marker *tail;
7177
7178       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7179         {
7180           tail->need_adjustment
7181             = tail->charpos == (tail->insertion_type ? from : to);
7182           need_marker_adjustment |= tail->need_adjustment;
7183         }
7184     }
7185
7186   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7187     {
7188       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7189       set_buffer_internal (XBUFFER (coding->src_object));
7190       if (STRINGP (src_object))
7191         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7192       else if (BUFFERP (src_object))
7193         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7194       else
7195         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7196
7197       if (EQ (src_object, dst_object))
7198         {
7199           set_buffer_internal (XBUFFER (src_object));
7200           saved_pt = PT, saved_pt_byte = PT_BYTE;
7201           del_range_both (from, from_byte, to, to_byte, 1);
7202           set_buffer_internal (XBUFFER (coding->src_object));
7203         }
7204
7205       {
7206         Lisp_Object args[3];
7207         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7208
7209         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7210                 old_deactivate_mark);
7211         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7212         args[1] = make_number (BEG);
7213         args[2] = make_number (Z);
7214         safe_call (3, args);
7215         UNGCPRO;
7216       }
7217       if (XBUFFER (coding->src_object) != current_buffer)
7218         kill_src_buffer = 1;
7219       coding->src_object = Fcurrent_buffer ();
7220       if (BEG != GPT)
7221         move_gap_both (BEG, BEG_BYTE);
7222       coding->src_chars = Z - BEG;
7223       coding->src_bytes = Z_BYTE - BEG_BYTE;
7224       coding->src_pos = BEG;
7225       coding->src_pos_byte = BEG_BYTE;
7226       coding->src_multibyte = Z < Z_BYTE;
7227     }
7228   else if (STRINGP (src_object))
7229     {
7230       code_conversion_save (0, 0);
7231       coding->src_pos = from;
7232       coding->src_pos_byte = from_byte;
7233     }
7234   else if (BUFFERP (src_object))
7235     {
7236       code_conversion_save (0, 0);
7237       set_buffer_internal (XBUFFER (src_object));
7238       if (EQ (src_object, dst_object))
7239         {
7240           saved_pt = PT, saved_pt_byte = PT_BYTE;
7241           coding->src_object = del_range_1 (from, to, 1, 1);
7242           coding->src_pos = 0;
7243           coding->src_pos_byte = 0;
7244         }
7245       else
7246         {
7247           if (from < GPT && to >= GPT)
7248             move_gap_both (from, from_byte);
7249           coding->src_pos = from;
7250           coding->src_pos_byte = from_byte;
7251         }
7252     }
7253   else
7254     code_conversion_save (0, 0);
7255
7256   if (BUFFERP (dst_object))
7257     {
7258       coding->dst_object = dst_object;
7259       if (EQ (src_object, dst_object))
7260         {
7261           coding->dst_pos = from;
7262           coding->dst_pos_byte = from_byte;
7263         }
7264       else
7265         {
7266           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7267           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7268         }
7269       coding->dst_multibyte
7270         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7271     }
7272   else if (EQ (dst_object, Qt))
7273     {
7274       coding->dst_object = Qnil;
7275       coding->dst_bytes = coding->src_chars;
7276       if (coding->dst_bytes == 0)
7277         coding->dst_bytes = 1;
7278       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
7279       coding->dst_multibyte = 0;
7280     }
7281   else
7282     {
7283       coding->dst_object = Qnil;
7284       coding->dst_multibyte = 0;
7285     }
7286
7287   encode_coding (coding);
7288
7289   if (EQ (dst_object, Qt))
7290     {
7291       if (BUFFERP (coding->dst_object))
7292         coding->dst_object = Fbuffer_string ();
7293       else
7294         {
7295           coding->dst_object
7296             = make_unibyte_string ((char *) coding->destination,
7297                                    coding->produced);
7298           xfree (coding->destination);
7299         }
7300     }
7301
7302   if (saved_pt >= 0)
7303     {
7304       /* This is the case of:
7305          (BUFFERP (src_object) && EQ (src_object, dst_object))
7306          As we have moved PT while replacing the original buffer
7307          contents, we must recover it now.  */
7308       set_buffer_internal (XBUFFER (src_object));
7309       if (saved_pt < from)
7310         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7311       else if (saved_pt < from + chars)
7312         TEMP_SET_PT_BOTH (from, from_byte);
7313       else if (! NILP (current_buffer->enable_multibyte_characters))
7314         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7315                           saved_pt_byte + (coding->produced - bytes));
7316       else
7317         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7318                           saved_pt_byte + (coding->produced - bytes));
7319
7320       if (need_marker_adjustment)
7321         {
7322           struct Lisp_Marker *tail;
7323
7324           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7325             if (tail->need_adjustment)
7326               {
7327                 tail->need_adjustment = 0;
7328                 if (tail->insertion_type)
7329                   {
7330                     tail->bytepos = from_byte;
7331                     tail->charpos = from;
7332                   }
7333                 else
7334                   {
7335                     tail->bytepos = from_byte + coding->produced;
7336                     tail->charpos
7337                       = (NILP (current_buffer->enable_multibyte_characters)
7338                          ? tail->bytepos : from + coding->produced_char);
7339                   }
7340               }
7341         }
7342     }
7343
7344   if (kill_src_buffer)
7345     Fkill_buffer (coding->src_object);
7346
7347   Vdeactivate_mark = old_deactivate_mark;
7348   unbind_to (count, Qnil);
7349 }
7350
7351
7352 Lisp_Object
7353 preferred_coding_system ()
7354 {
7355   int id = coding_categories[coding_priorities[0]].id;
7356
7357   return CODING_ID_NAME (id);
7358 }
7359
7360 \f
7361 #ifdef emacs
7362 /*** 8. Emacs Lisp library functions ***/
7363
7364 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
7365        doc: /* Return t if OBJECT is nil or a coding-system.
7366 See the documentation of `define-coding-system' for information
7367 about coding-system objects.  */)
7368      (obj)
7369      Lisp_Object obj;
7370 {
7371   if (NILP (obj)
7372       || CODING_SYSTEM_ID (obj) >= 0)
7373     return Qt;
7374   if (! SYMBOLP (obj)
7375       || NILP (Fget (obj, Qcoding_system_define_form)))
7376     return Qnil;
7377   return Qt;
7378 }
7379
7380 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7381        Sread_non_nil_coding_system, 1, 1, 0,
7382        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
7383      (prompt)
7384      Lisp_Object prompt;
7385 {
7386   Lisp_Object val;
7387   do
7388     {
7389       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7390                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
7391     }
7392   while (SCHARS (val) == 0);
7393   return (Fintern (val, Qnil));
7394 }
7395
7396 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
7397        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7398 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7399 Ignores case when completing coding systems (all Emacs coding systems
7400 are lower-case).  */)
7401      (prompt, default_coding_system)
7402      Lisp_Object prompt, default_coding_system;
7403 {
7404   Lisp_Object val;
7405   int count = SPECPDL_INDEX ();
7406
7407   if (SYMBOLP (default_coding_system))
7408     default_coding_system = SYMBOL_NAME (default_coding_system);
7409   specbind (Qcompletion_ignore_case, Qt);
7410   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7411                           Qt, Qnil, Qcoding_system_history,
7412                           default_coding_system, Qnil);
7413   unbind_to (count, Qnil);
7414   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
7415 }
7416
7417 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7418        1, 1, 0,
7419        doc: /* Check validity of CODING-SYSTEM.
7420 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7421 It is valid if it is nil or a symbol defined as a coding system by the
7422 function `define-coding-system'.  */)
7423   (coding_system)
7424      Lisp_Object coding_system;
7425 {
7426   Lisp_Object define_form;
7427
7428   define_form = Fget (coding_system, Qcoding_system_define_form);
7429   if (! NILP (define_form))
7430     {
7431       Fput (coding_system, Qcoding_system_define_form, Qnil);
7432       safe_eval (define_form);
7433     }
7434   if (!NILP (Fcoding_system_p (coding_system)))
7435     return coding_system;
7436   xsignal1 (Qcoding_system_error, coding_system);
7437 }
7438
7439 \f
7440 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
7441    HIGHEST is nonzero, return the coding system of the highest
7442    priority among the detected coding systems.  Otherwize return a
7443    list of detected coding systems sorted by their priorities.  If
7444    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7445    multibyte form but contains only ASCII and eight-bit chars.
7446    Otherwise, the bytes are raw bytes.
7447
7448    CODING-SYSTEM controls the detection as below:
7449
7450    If it is nil, detect both text-format and eol-format.  If the
7451    text-format part of CODING-SYSTEM is already specified
7452    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
7453    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7454    detect only text-format.  */
7455
7456 Lisp_Object
7457 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7458                       coding_system)
7459      const unsigned char *src;
7460      EMACS_INT src_chars, src_bytes;
7461      int highest;
7462      int multibytep;
7463      Lisp_Object coding_system;
7464 {
7465   const unsigned char *src_end = src + src_bytes;
7466   Lisp_Object attrs, eol_type;
7467   Lisp_Object val;
7468   struct coding_system coding;
7469   int id;
7470   struct coding_detection_info detect_info;
7471   enum coding_category base_category;
7472
7473   if (NILP (coding_system))
7474     coding_system = Qundecided;
7475   setup_coding_system (coding_system, &coding);
7476   attrs = CODING_ID_ATTRS (coding.id);
7477   eol_type = CODING_ID_EOL_TYPE (coding.id);
7478   coding_system = CODING_ATTR_BASE_NAME (attrs);
7479
7480   coding.source = src;
7481   coding.src_chars = src_chars;
7482   coding.src_bytes = src_bytes;
7483   coding.src_multibyte = multibytep;
7484   coding.consumed = 0;
7485   coding.mode |= CODING_MODE_LAST_BLOCK;
7486
7487   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7488
7489   /* At first, detect text-format if necessary.  */
7490   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7491   if (base_category == coding_category_undecided)
7492     {
7493       enum coding_category category;
7494       struct coding_system *this;
7495       int c, i;
7496
7497       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7498       for (i = 0; src < src_end; i++, src++)
7499         {
7500           c = *src;
7501           if (c & 0x80)
7502             break;
7503           if (c < 0x20
7504               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7505               && ! inhibit_iso_escape_detection)
7506             {
7507               coding.head_ascii = src - coding.source;
7508               if (detect_coding_iso_2022 (&coding, &detect_info))
7509                 {
7510                   /* We have scanned the whole data.  */
7511                   if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7512                     /* We didn't find an 8-bit code.  */
7513                     src = src_end;
7514                   break;
7515                 }
7516             }
7517         }
7518       coding.head_ascii = src - coding.source;
7519
7520       if (src < src_end
7521           || detect_info.found)
7522         {
7523           if (src == src_end)
7524             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
7525             for (i = 0; i < coding_category_raw_text; i++)
7526               {
7527                 category = coding_priorities[i];
7528                 this = coding_categories + category;
7529                 if (detect_info.found & (1 << category))
7530                   break;
7531               }
7532           else
7533             for (i = 0; i < coding_category_raw_text; i++)
7534               {
7535                 category = coding_priorities[i];
7536                 this = coding_categories + category;
7537
7538                 if (this->id < 0)
7539                   {
7540                     /* No coding system of this category is defined.  */
7541                     detect_info.rejected |= (1 << category);
7542                   }
7543                 else if (category >= coding_category_raw_text)
7544                   continue;
7545                 else if (detect_info.checked & (1 << category))
7546                   {
7547                     if (highest
7548                         && (detect_info.found & (1 << category)))
7549                       break;
7550                   }
7551                 else
7552                   {
7553                     if ((*(this->detector)) (&coding, &detect_info)
7554                         && highest
7555                         && (detect_info.found & (1 << category)))
7556                       {
7557                         if (category == coding_category_utf_16_auto)
7558                           {
7559                             if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7560                               category = coding_category_utf_16_le;
7561                             else
7562                               category = coding_category_utf_16_be;
7563                           }
7564                         break;
7565                       }
7566                   }
7567               }
7568         }
7569
7570       if (detect_info.rejected == CATEGORY_MASK_ANY)
7571         {
7572           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7573           id = coding_categories[coding_category_raw_text].id;
7574           val = Fcons (make_number (id), Qnil);
7575         }
7576       else if (! detect_info.rejected && ! detect_info.found)
7577         {
7578           detect_info.found = CATEGORY_MASK_ANY;
7579           id = coding_categories[coding_category_undecided].id;
7580           val = Fcons (make_number (id), Qnil);
7581         }
7582       else if (highest)
7583         {
7584           if (detect_info.found)
7585             {
7586               detect_info.found = 1 << category;
7587               val = Fcons (make_number (this->id), Qnil);
7588             }
7589           else
7590             for (i = 0; i < coding_category_raw_text; i++)
7591               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7592                 {
7593                   detect_info.found = 1 << coding_priorities[i];
7594                   id = coding_categories[coding_priorities[i]].id;
7595                   val = Fcons (make_number (id), Qnil);
7596                   break;
7597                 }
7598         }
7599       else
7600         {
7601           int mask = detect_info.rejected | detect_info.found;
7602           int found = 0;
7603           val = Qnil;
7604
7605           for (i = coding_category_raw_text - 1; i >= 0; i--)
7606             {
7607               category = coding_priorities[i];
7608               if (! (mask & (1 << category)))
7609                 {
7610                   found |= 1 << category;
7611                   id = coding_categories[category].id;
7612                   if (id >= 0)
7613                     val = Fcons (make_number (id), val);
7614                 }
7615             }
7616           for (i = coding_category_raw_text - 1; i >= 0; i--)
7617             {
7618               category = coding_priorities[i];
7619               if (detect_info.found & (1 << category))
7620                 {
7621                   id = coding_categories[category].id;
7622                   val = Fcons (make_number (id), val);
7623                 }
7624             }
7625           detect_info.found |= found;
7626         }
7627     }
7628   else if (base_category == coding_category_utf_16_auto)
7629     {
7630       if (detect_coding_utf_16 (&coding, &detect_info))
7631         {
7632           struct coding_system *this;
7633
7634           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7635             this = coding_categories + coding_category_utf_16_le;
7636           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7637             this = coding_categories + coding_category_utf_16_be;
7638           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7639             this = coding_categories + coding_category_utf_16_be_nosig;
7640           else
7641             this = coding_categories + coding_category_utf_16_le_nosig;
7642           val = Fcons (make_number (this->id), Qnil);
7643         }
7644     }
7645   else
7646     {
7647       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7648       val = Fcons (make_number (coding.id), Qnil);
7649     }
7650
7651   /* Then, detect eol-format if necessary.  */
7652   {
7653     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
7654     Lisp_Object tail;
7655
7656     if (VECTORP (eol_type))
7657       {
7658         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7659           normal_eol = detect_eol (coding.source, src_bytes,
7660                                    coding_category_raw_text);
7661         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7662                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7663           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7664                                       coding_category_utf_16_be);
7665         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7666                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7667           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7668                                       coding_category_utf_16_le);
7669       }
7670     else
7671       {
7672         if (EQ (eol_type, Qunix))
7673           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7674         else if (EQ (eol_type, Qdos))
7675           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7676         else
7677           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7678       }
7679
7680     for (tail = val; CONSP (tail); tail = XCDR (tail))
7681       {
7682         enum coding_category category;
7683         int this_eol;
7684
7685         id = XINT (XCAR (tail));
7686         attrs = CODING_ID_ATTRS (id);
7687         category = XINT (CODING_ATTR_CATEGORY (attrs));
7688         eol_type = CODING_ID_EOL_TYPE (id);
7689         if (VECTORP (eol_type))
7690           {
7691             if (category == coding_category_utf_16_be
7692                 || category == coding_category_utf_16_be_nosig)
7693               this_eol = utf_16_be_eol;
7694             else if (category == coding_category_utf_16_le
7695                      || category == coding_category_utf_16_le_nosig)
7696               this_eol = utf_16_le_eol;
7697             else
7698               this_eol = normal_eol;
7699
7700             if (this_eol == EOL_SEEN_LF)
7701               XSETCAR (tail, AREF (eol_type, 0));
7702             else if (this_eol == EOL_SEEN_CRLF)
7703               XSETCAR (tail, AREF (eol_type, 1));
7704             else if (this_eol == EOL_SEEN_CR)
7705               XSETCAR (tail, AREF (eol_type, 2));
7706             else
7707               XSETCAR (tail, CODING_ID_NAME (id));
7708           }
7709         else
7710           XSETCAR (tail, CODING_ID_NAME (id));
7711       }
7712   }
7713
7714   return (highest ? XCAR (val) : val);
7715 }
7716
7717
7718 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7719        2, 3, 0,
7720        doc: /* Detect coding system of the text in the region between START and END.
7721 Return a list of possible coding systems ordered by priority.
7722
7723 If only ASCII characters are found (except for such ISO-2022 control
7724 characters ISO-2022 as ESC), it returns a list of single element
7725 `undecided' or its subsidiary coding system according to a detected
7726 end-of-line format.
7727
7728 If optional argument HIGHEST is non-nil, return the coding system of
7729 highest priority.  */)
7730      (start, end, highest)
7731      Lisp_Object start, end, highest;
7732 {
7733   int from, to;
7734   int from_byte, to_byte;
7735
7736   CHECK_NUMBER_COERCE_MARKER (start);
7737   CHECK_NUMBER_COERCE_MARKER (end);
7738
7739   validate_region (&start, &end);
7740   from = XINT (start), to = XINT (end);
7741   from_byte = CHAR_TO_BYTE (from);
7742   to_byte = CHAR_TO_BYTE (to);
7743
7744   if (from < GPT && to >= GPT)
7745     move_gap_both (to, to_byte);
7746
7747   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7748                                to - from, to_byte - from_byte,
7749                                !NILP (highest),
7750                                !NILP (current_buffer
7751                                       ->enable_multibyte_characters),
7752                                Qnil);
7753 }
7754
7755 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7756        1, 2, 0,
7757        doc: /* Detect coding system of the text in STRING.
7758 Return a list of possible coding systems ordered by priority.
7759
7760 If only ASCII characters are found (except for such ISO-2022 control
7761 characters ISO-2022 as ESC), it returns a list of single element
7762 `undecided' or its subsidiary coding system according to a detected
7763 end-of-line format.
7764
7765 If optional argument HIGHEST is non-nil, return the coding system of
7766 highest priority.  */)
7767      (string, highest)
7768      Lisp_Object string, highest;
7769 {
7770   CHECK_STRING (string);
7771
7772   return detect_coding_system (SDATA (string),
7773                                SCHARS (string), SBYTES (string),
7774                                !NILP (highest), STRING_MULTIBYTE (string),
7775                                Qnil);
7776 }
7777
7778
7779 static INLINE int
7780 char_encodable_p (c, attrs)
7781      int c;
7782      Lisp_Object attrs;
7783 {
7784   Lisp_Object tail;
7785   struct charset *charset;
7786   Lisp_Object translation_table;
7787
7788   translation_table = CODING_ATTR_TRANS_TBL (attrs);
7789   if (! NILP (translation_table))
7790     c = translate_char (translation_table, c);
7791   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7792        CONSP (tail); tail = XCDR (tail))
7793     {
7794       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7795       if (CHAR_CHARSET_P (c, charset))
7796         break;
7797     }
7798   return (! NILP (tail));
7799 }
7800
7801
7802 /* Return a list of coding systems that safely encode the text between
7803    START and END.  If EXCLUDE is non-nil, it is a list of coding
7804    systems not to check.  The returned list doesn't contain any such
7805    coding systems.  In any case, if the text contains only ASCII or is
7806    unibyte, return t.  */
7807
7808 DEFUN ("find-coding-systems-region-internal",
7809        Ffind_coding_systems_region_internal,
7810        Sfind_coding_systems_region_internal, 2, 3, 0,
7811        doc: /* Internal use only.  */)
7812      (start, end, exclude)
7813      Lisp_Object start, end, exclude;
7814 {
7815   Lisp_Object coding_attrs_list, safe_codings;
7816   EMACS_INT start_byte, end_byte;
7817   const unsigned char *p, *pbeg, *pend;
7818   int c;
7819   Lisp_Object tail, elt;
7820
7821   if (STRINGP (start))
7822     {
7823       if (!STRING_MULTIBYTE (start)
7824           || SCHARS (start) == SBYTES (start))
7825         return Qt;
7826       start_byte = 0;
7827       end_byte = SBYTES (start);
7828     }
7829   else
7830     {
7831       CHECK_NUMBER_COERCE_MARKER (start);
7832       CHECK_NUMBER_COERCE_MARKER (end);
7833       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7834         args_out_of_range (start, end);
7835       if (NILP (current_buffer->enable_multibyte_characters))
7836         return Qt;
7837       start_byte = CHAR_TO_BYTE (XINT (start));
7838       end_byte = CHAR_TO_BYTE (XINT (end));
7839       if (XINT (end) - XINT (start) == end_byte - start_byte)
7840         return Qt;
7841
7842       if (XINT (start) < GPT && XINT (end) > GPT)
7843         {
7844           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7845             move_gap_both (XINT (start), start_byte);
7846           else
7847             move_gap_both (XINT (end), end_byte);
7848         }
7849     }
7850
7851   coding_attrs_list = Qnil;
7852   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7853     if (NILP (exclude)
7854         || NILP (Fmemq (XCAR (tail), exclude)))
7855       {
7856         Lisp_Object attrs;
7857
7858         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7859         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7860             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7861           {
7862             ASET (attrs, coding_attr_trans_tbl,
7863                   get_translation_table (attrs, 1, NULL));
7864             coding_attrs_list = Fcons (attrs, coding_attrs_list);
7865           }
7866       }
7867
7868   if (STRINGP (start))
7869     p = pbeg = SDATA (start);
7870   else
7871     p = pbeg = BYTE_POS_ADDR (start_byte);
7872   pend = p + (end_byte - start_byte);
7873
7874   while (p < pend && ASCII_BYTE_P (*p)) p++;
7875   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7876
7877   while (p < pend)
7878     {
7879       if (ASCII_BYTE_P (*p))
7880         p++;
7881       else
7882         {
7883           c = STRING_CHAR_ADVANCE (p);
7884
7885           charset_map_loaded = 0;
7886           for (tail = coding_attrs_list; CONSP (tail);)
7887             {
7888               elt = XCAR (tail);
7889               if (NILP (elt))
7890                 tail = XCDR (tail);
7891               else if (char_encodable_p (c, elt))
7892                 tail = XCDR (tail);
7893               else if (CONSP (XCDR (tail)))
7894                 {
7895                   XSETCAR (tail, XCAR (XCDR (tail)));
7896                   XSETCDR (tail, XCDR (XCDR (tail)));
7897                 }
7898               else
7899                 {
7900                   XSETCAR (tail, Qnil);
7901                   tail = XCDR (tail);
7902                 }
7903             }
7904           if (charset_map_loaded)
7905             {
7906               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7907
7908               if (STRINGP (start))
7909                 pbeg = SDATA (start);
7910               else
7911                 pbeg = BYTE_POS_ADDR (start_byte);
7912               p = pbeg + p_offset;
7913               pend = pbeg + pend_offset;
7914             }
7915         }
7916     }
7917
7918   safe_codings = list2 (Qraw_text, Qno_conversion);
7919   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7920     if (! NILP (XCAR (tail)))
7921       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7922
7923   return safe_codings;
7924 }
7925
7926
7927 DEFUN ("unencodable-char-position", Funencodable_char_position,
7928        Sunencodable_char_position, 3, 5, 0,
7929        doc: /*
7930 Return position of first un-encodable character in a region.
7931 START and END specfiy the region and CODING-SYSTEM specifies the
7932 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
7933
7934 If optional 4th argument COUNT is non-nil, it specifies at most how
7935 many un-encodable characters to search.  In this case, the value is a
7936 list of positions.
7937
7938 If optional 5th argument STRING is non-nil, it is a string to search
7939 for un-encodable characters.  In that case, START and END are indexes
7940 to the string.  */)
7941      (start, end, coding_system, count, string)
7942      Lisp_Object start, end, coding_system, count, string;
7943 {
7944   int n;
7945   struct coding_system coding;
7946   Lisp_Object attrs, charset_list, translation_table;
7947   Lisp_Object positions;
7948   int from, to;
7949   const unsigned char *p, *stop, *pend;
7950   int ascii_compatible;
7951
7952   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7953   attrs = CODING_ID_ATTRS (coding.id);
7954   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7955     return Qnil;
7956   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7957   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7958   translation_table = get_translation_table (attrs, 1, NULL);
7959
7960   if (NILP (string))
7961     {
7962       validate_region (&start, &end);
7963       from = XINT (start);
7964       to = XINT (end);
7965       if (NILP (current_buffer->enable_multibyte_characters)
7966           || (ascii_compatible
7967               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7968         return Qnil;
7969       p = CHAR_POS_ADDR (from);
7970       pend = CHAR_POS_ADDR (to);
7971       if (from < GPT && to >= GPT)
7972         stop = GPT_ADDR;
7973       else
7974         stop = pend;
7975     }
7976   else
7977     {
7978       CHECK_STRING (string);
7979       CHECK_NATNUM (start);
7980       CHECK_NATNUM (end);
7981       from = XINT (start);
7982       to = XINT (end);
7983       if (from > to
7984           || to > SCHARS (string))
7985         args_out_of_range_3 (string, start, end);
7986       if (! STRING_MULTIBYTE (string))
7987         return Qnil;
7988       p = SDATA (string) + string_char_to_byte (string, from);
7989       stop = pend = SDATA (string) + string_char_to_byte (string, to);
7990       if (ascii_compatible && (to - from) == (pend - p))
7991         return Qnil;
7992     }
7993
7994   if (NILP (count))
7995     n = 1;
7996   else
7997     {
7998       CHECK_NATNUM (count);
7999       n = XINT (count);
8000     }
8001
8002   positions = Qnil;
8003   while (1)
8004     {
8005       int c;
8006
8007       if (ascii_compatible)
8008         while (p < stop && ASCII_BYTE_P (*p))
8009           p++, from++;
8010       if (p >= stop)
8011         {
8012           if (p >= pend)
8013             break;
8014           stop = pend;
8015           p = GAP_END_ADDR;
8016         }
8017
8018       c = STRING_CHAR_ADVANCE (p);
8019       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8020           && ! char_charset (translate_char (translation_table, c),
8021                              charset_list, NULL))
8022         {
8023           positions = Fcons (make_number (from), positions);
8024           n--;
8025           if (n == 0)
8026             break;
8027         }
8028
8029       from++;
8030     }
8031
8032   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8033 }
8034
8035
8036 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8037        Scheck_coding_systems_region, 3, 3, 0,
8038        doc: /* Check if the region is encodable by coding systems.
8039
8040 START and END are buffer positions specifying the region.
8041 CODING-SYSTEM-LIST is a list of coding systems to check.
8042
8043 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8044 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
8045 whole region, POS0, POS1, ... are buffer positions where non-encodable
8046 characters are found.
8047
8048 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8049 value is nil.
8050
8051 START may be a string.  In that case, check if the string is
8052 encodable, and the value contains indices to the string instead of
8053 buffer positions.  END is ignored.  */)
8054      (start, end, coding_system_list)
8055      Lisp_Object start, end, coding_system_list;
8056 {
8057   Lisp_Object list;
8058   EMACS_INT start_byte, end_byte;
8059   int pos;
8060   const unsigned char *p, *pbeg, *pend;
8061   int c;
8062   Lisp_Object tail, elt, attrs;
8063
8064   if (STRINGP (start))
8065     {
8066       if (!STRING_MULTIBYTE (start)
8067           && SCHARS (start) != SBYTES (start))
8068         return Qnil;
8069       start_byte = 0;
8070       end_byte = SBYTES (start);
8071       pos = 0;
8072     }
8073   else
8074     {
8075       CHECK_NUMBER_COERCE_MARKER (start);
8076       CHECK_NUMBER_COERCE_MARKER (end);
8077       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8078         args_out_of_range (start, end);
8079       if (NILP (current_buffer->enable_multibyte_characters))
8080         return Qnil;
8081       start_byte = CHAR_TO_BYTE (XINT (start));
8082       end_byte = CHAR_TO_BYTE (XINT (end));
8083       if (XINT (end) - XINT (start) == end_byte - start_byte)
8084         return Qt;
8085
8086       if (XINT (start) < GPT && XINT (end) > GPT)
8087         {
8088           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8089             move_gap_both (XINT (start), start_byte);
8090           else
8091             move_gap_both (XINT (end), end_byte);
8092         }
8093       pos = XINT (start);
8094     }
8095
8096   list = Qnil;
8097   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8098     {
8099       elt = XCAR (tail);
8100       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8101       ASET (attrs, coding_attr_trans_tbl,
8102             get_translation_table (attrs, 1, NULL));
8103       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8104     }
8105
8106   if (STRINGP (start))
8107     p = pbeg = SDATA (start);
8108   else
8109     p = pbeg = BYTE_POS_ADDR (start_byte);
8110   pend = p + (end_byte - start_byte);
8111
8112   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8113   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8114
8115   while (p < pend)
8116     {
8117       if (ASCII_BYTE_P (*p))
8118         p++;
8119       else
8120         {
8121           c = STRING_CHAR_ADVANCE (p);
8122
8123           charset_map_loaded = 0;
8124           for (tail = list; CONSP (tail); tail = XCDR (tail))
8125             {
8126               elt = XCDR (XCAR (tail));
8127               if (! char_encodable_p (c, XCAR (elt)))
8128                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8129             }
8130           if (charset_map_loaded)
8131             {
8132               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8133
8134               if (STRINGP (start))
8135                 pbeg = SDATA (start);
8136               else
8137                 pbeg = BYTE_POS_ADDR (start_byte);
8138               p = pbeg + p_offset;
8139               pend = pbeg + pend_offset;
8140             }
8141         }
8142       pos++;
8143     }
8144
8145   tail = list;
8146   list = Qnil;
8147   for (; CONSP (tail); tail = XCDR (tail))
8148     {
8149       elt = XCAR (tail);
8150       if (CONSP (XCDR (XCDR (elt))))
8151         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8152                       list);
8153     }
8154
8155   return list;
8156 }
8157
8158
8159 Lisp_Object
8160 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8161      Lisp_Object start, end, coding_system, dst_object;
8162      int encodep, norecord;
8163 {
8164   struct coding_system coding;
8165   EMACS_INT from, from_byte, to, to_byte;
8166   Lisp_Object src_object;
8167
8168   CHECK_NUMBER_COERCE_MARKER (start);
8169   CHECK_NUMBER_COERCE_MARKER (end);
8170   if (NILP (coding_system))
8171     coding_system = Qno_conversion;
8172   else
8173     CHECK_CODING_SYSTEM (coding_system);
8174   src_object = Fcurrent_buffer ();
8175   if (NILP (dst_object))
8176     dst_object = src_object;
8177   else if (! EQ (dst_object, Qt))
8178     CHECK_BUFFER (dst_object);
8179
8180   validate_region (&start, &end);
8181   from = XFASTINT (start);
8182   from_byte = CHAR_TO_BYTE (from);
8183   to = XFASTINT (end);
8184   to_byte = CHAR_TO_BYTE (to);
8185
8186   setup_coding_system (coding_system, &coding);
8187   coding.mode |= CODING_MODE_LAST_BLOCK;
8188
8189   if (encodep)
8190     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8191                           dst_object);
8192   else
8193     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8194                           dst_object);
8195   if (! norecord)
8196     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8197
8198   return (BUFFERP (dst_object)
8199           ? make_number (coding.produced_char)
8200           : coding.dst_object);
8201 }
8202
8203
8204 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8205        3, 4, "r\nzCoding system: ",
8206        doc: /* Decode the current region from the specified coding system.
8207 When called from a program, takes four arguments:
8208         START, END, CODING-SYSTEM, and DESTINATION.
8209 START and END are buffer positions.
8210
8211 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8212 If nil, the region between START and END is replaced by the decoded text.
8213 If buffer, the decoded text is inserted in the buffer.
8214 If t, the decoded text is returned.
8215
8216 This function sets `last-coding-system-used' to the precise coding system
8217 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8218 not fully specified.)
8219 It returns the length of the decoded text.  */)
8220      (start, end, coding_system, destination)
8221      Lisp_Object start, end, coding_system, destination;
8222 {
8223   return code_convert_region (start, end, coding_system, destination, 0, 0);
8224 }
8225
8226 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8227        3, 4, "r\nzCoding system: ",
8228        doc: /* Encode the current region by specified coding system.
8229 When called from a program, takes three arguments:
8230 START, END, and CODING-SYSTEM.  START and END are buffer positions.
8231
8232 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8233 If nil, the region between START and END is replace by the encoded text.
8234 If buffer, the encoded text is inserted in the buffer.
8235 If t, the encoded text is returned.
8236
8237 This function sets `last-coding-system-used' to the precise coding system
8238 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8239 not fully specified.)
8240 It returns the length of the encoded text.  */)
8241   (start, end, coding_system, destination)
8242      Lisp_Object start, end, coding_system, destination;
8243 {
8244   return code_convert_region (start, end, coding_system, destination, 1, 0);
8245 }
8246
8247 Lisp_Object
8248 code_convert_string (string, coding_system, dst_object,
8249                      encodep, nocopy, norecord)
8250      Lisp_Object string, coding_system, dst_object;
8251      int encodep, nocopy, norecord;
8252 {
8253   struct coding_system coding;
8254   EMACS_INT chars, bytes;
8255
8256   CHECK_STRING (string);
8257   if (NILP (coding_system))
8258     {
8259       if (! norecord)
8260         Vlast_coding_system_used = Qno_conversion;
8261       if (NILP (dst_object))
8262         return (nocopy ? Fcopy_sequence (string) : string);
8263     }
8264
8265   if (NILP (coding_system))
8266     coding_system = Qno_conversion;
8267   else
8268     CHECK_CODING_SYSTEM (coding_system);
8269   if (NILP (dst_object))
8270     dst_object = Qt;
8271   else if (! EQ (dst_object, Qt))
8272     CHECK_BUFFER (dst_object);
8273
8274   setup_coding_system (coding_system, &coding);
8275   coding.mode |= CODING_MODE_LAST_BLOCK;
8276   chars = SCHARS (string);
8277   bytes = SBYTES (string);
8278   if (encodep)
8279     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8280   else
8281     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8282   if (! norecord)
8283     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8284
8285   return (BUFFERP (dst_object)
8286           ? make_number (coding.produced_char)
8287           : coding.dst_object);
8288 }
8289
8290
8291 /* Encode or decode STRING according to CODING_SYSTEM.
8292    Do not set Vlast_coding_system_used.
8293
8294    This function is called only from macros DECODE_FILE and
8295    ENCODE_FILE, thus we ignore character composition.  */
8296
8297 Lisp_Object
8298 code_convert_string_norecord (string, coding_system, encodep)
8299      Lisp_Object string, coding_system;
8300      int encodep;
8301 {
8302   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
8303 }
8304
8305
8306 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8307        2, 4, 0,
8308        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8309
8310 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8311 if the decoding operation is trivial.
8312
8313 Optional fourth arg BUFFER non-nil meant that the decoded text is
8314 inserted in BUFFER instead of returned as a string.  In this case,
8315 the return value is BUFFER.
8316
8317 This function sets `last-coding-system-used' to the precise coding system
8318 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8319 not fully specified.  */)
8320   (string, coding_system, nocopy, buffer)
8321      Lisp_Object string, coding_system, nocopy, buffer;
8322 {
8323   return code_convert_string (string, coding_system, buffer,
8324                               0, ! NILP (nocopy), 0);
8325 }
8326
8327 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8328        2, 4, 0,
8329        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8330
8331 Optional third arg NOCOPY non-nil means it is OK to return STRING
8332 itself if the encoding operation is trivial.
8333
8334 Optional fourth arg BUFFER non-nil meant that the encoded text is
8335 inserted in BUFFER instead of returned as a string.  In this case,
8336 the return value is BUFFER.
8337
8338 This function sets `last-coding-system-used' to the precise coding system
8339 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8340 not fully specified.)  */)
8341      (string, coding_system, nocopy, buffer)
8342      Lisp_Object string, coding_system, nocopy, buffer;
8343 {
8344   return code_convert_string (string, coding_system, buffer,
8345                               1, ! NILP (nocopy), 1);
8346 }
8347
8348 \f
8349 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
8350        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8351 Return the corresponding character.  */)
8352      (code)
8353      Lisp_Object code;
8354 {
8355   Lisp_Object spec, attrs, val;
8356   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8357   int c;
8358
8359   CHECK_NATNUM (code);
8360   c = XFASTINT (code);
8361   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8362   attrs = AREF (spec, 0);
8363
8364   if (ASCII_BYTE_P (c)
8365       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8366     return code;
8367
8368   val = CODING_ATTR_CHARSET_LIST (attrs);
8369   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8370   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8371   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
8372
8373   if (c <= 0x7F)
8374     charset = charset_roman;
8375   else if (c >= 0xA0 && c < 0xDF)
8376     {
8377       charset = charset_kana;
8378       c -= 0x80;
8379     }
8380   else
8381     {
8382       int s1 = c >> 8, s2 = c & 0xFF;
8383
8384       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8385           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8386         error ("Invalid code: %d", code);
8387       SJIS_TO_JIS (c);
8388       charset = charset_kanji;
8389     }
8390   c = DECODE_CHAR (charset, c);
8391   if (c < 0)
8392     error ("Invalid code: %d", code);
8393   return make_number (c);
8394 }
8395
8396
8397 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8398        doc: /* Encode a Japanese character CH to shift_jis encoding.
8399 Return the corresponding code in SJIS.  */)
8400      (ch)
8401     Lisp_Object ch;
8402 {
8403   Lisp_Object spec, attrs, charset_list;
8404   int c;
8405   struct charset *charset;
8406   unsigned code;
8407
8408   CHECK_CHARACTER (ch);
8409   c = XFASTINT (ch);
8410   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8411   attrs = AREF (spec, 0);
8412
8413   if (ASCII_CHAR_P (c)
8414       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8415     return ch;
8416
8417   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8418   charset = char_charset (c, charset_list, &code);
8419   if (code == CHARSET_INVALID_CODE (charset))
8420     error ("Can't encode by shift_jis encoding: %d", c);
8421   JIS_TO_SJIS (code);
8422
8423   return make_number (code);
8424 }
8425
8426 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
8427        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8428 Return the corresponding character.  */)
8429      (code)
8430      Lisp_Object code;
8431 {
8432   Lisp_Object spec, attrs, val;
8433   struct charset *charset_roman, *charset_big5, *charset;
8434   int c;
8435
8436   CHECK_NATNUM (code);
8437   c = XFASTINT (code);
8438   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8439   attrs = AREF (spec, 0);
8440
8441   if (ASCII_BYTE_P (c)
8442       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8443     return code;
8444
8445   val = CODING_ATTR_CHARSET_LIST (attrs);
8446   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8447   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
8448
8449   if (c <= 0x7F)
8450     charset = charset_roman;
8451   else
8452     {
8453       int b1 = c >> 8, b2 = c & 0x7F;
8454       if (b1 < 0xA1 || b1 > 0xFE
8455           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8456         error ("Invalid code: %d", code);
8457       charset = charset_big5;
8458     }
8459   c = DECODE_CHAR (charset, (unsigned )c);
8460   if (c < 0)
8461     error ("Invalid code: %d", code);
8462   return make_number (c);
8463 }
8464
8465 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8466        doc: /* Encode the Big5 character CH to BIG5 coding system.
8467 Return the corresponding character code in Big5.  */)
8468      (ch)
8469      Lisp_Object ch;
8470 {
8471   Lisp_Object spec, attrs, charset_list;
8472   struct charset *charset;
8473   int c;
8474   unsigned code;
8475
8476   CHECK_CHARACTER (ch);
8477   c = XFASTINT (ch);
8478   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8479   attrs = AREF (spec, 0);
8480   if (ASCII_CHAR_P (c)
8481       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8482     return ch;
8483
8484   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8485   charset = char_charset (c, charset_list, &code);
8486   if (code == CHARSET_INVALID_CODE (charset))
8487     error ("Can't encode by Big5 encoding: %d", c);
8488
8489   return make_number (code);
8490 }
8491
8492 \f
8493 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
8494        Sset_terminal_coding_system_internal, 1, 2, 0,
8495        doc: /* Internal use only.  */)
8496      (coding_system, terminal)
8497      Lisp_Object coding_system;
8498      Lisp_Object terminal;
8499 {
8500   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8501   CHECK_SYMBOL (coding_system);
8502   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
8503   /* We had better not send unsafe characters to terminal.  */
8504   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
8505   /* Characer composition should be disabled.  */
8506   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8507   terminal_coding->src_multibyte = 1;
8508   terminal_coding->dst_multibyte = 0;
8509   return Qnil;
8510 }
8511
8512 DEFUN ("set-safe-terminal-coding-system-internal",
8513        Fset_safe_terminal_coding_system_internal,
8514        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8515        doc: /* Internal use only.  */)
8516      (coding_system)
8517      Lisp_Object coding_system;
8518 {
8519   CHECK_SYMBOL (coding_system);
8520   setup_coding_system (Fcheck_coding_system (coding_system),
8521                        &safe_terminal_coding);
8522   /* Characer composition should be disabled.  */
8523   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8524   safe_terminal_coding.src_multibyte = 1;
8525   safe_terminal_coding.dst_multibyte = 0;
8526   return Qnil;
8527 }
8528
8529 DEFUN ("terminal-coding-system", Fterminal_coding_system,
8530        Sterminal_coding_system, 0, 1, 0,
8531        doc: /* Return coding system specified for terminal output on the given terminal.
8532 TERMINAL may be a terminal id, a frame, or nil for the selected
8533 frame's terminal device.  */)
8534      (terminal)
8535      Lisp_Object terminal;
8536 {
8537   struct coding_system *terminal_coding
8538     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8539   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
8540
8541   /* For backward compatibility, return nil if it is `undecided'. */
8542   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
8543 }
8544
8545 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
8546        Sset_keyboard_coding_system_internal, 1, 2, 0,
8547        doc: /* Internal use only.  */)
8548      (coding_system, terminal)
8549      Lisp_Object coding_system;
8550      Lisp_Object terminal;
8551 {
8552   struct terminal *t = get_terminal (terminal, 1);
8553   CHECK_SYMBOL (coding_system);
8554   setup_coding_system (Fcheck_coding_system (coding_system),
8555                        TERMINAL_KEYBOARD_CODING (t));
8556   /* Characer composition should be disabled.  */
8557   TERMINAL_KEYBOARD_CODING (t)->common_flags
8558     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8559   return Qnil;
8560 }
8561
8562 DEFUN ("keyboard-coding-system",
8563        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
8564        doc: /* Return coding system specified for decoding keyboard input.  */)
8565      (terminal)
8566      Lisp_Object terminal;
8567 {
8568   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8569                          (get_terminal (terminal, 1))->id);
8570 }
8571
8572 \f
8573 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8574        Sfind_operation_coding_system,  1, MANY, 0,
8575        doc: /* Choose a coding system for an operation based on the target name.
8576 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8577 DECODING-SYSTEM is the coding system to use for decoding
8578 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8579 for encoding (in case OPERATION does encoding).
8580
8581 The first argument OPERATION specifies an I/O primitive:
8582   For file I/O, `insert-file-contents' or `write-region'.
8583   For process I/O, `call-process', `call-process-region', or `start-process'.
8584   For network I/O, `open-network-stream'.
8585
8586 The remaining arguments should be the same arguments that were passed
8587 to the primitive.  Depending on which primitive, one of those arguments
8588 is selected as the TARGET.  For example, if OPERATION does file I/O,
8589 whichever argument specifies the file name is TARGET.
8590
8591 TARGET has a meaning which depends on OPERATION:
8592   For file I/O, TARGET is a file name (except for the special case below).
8593   For process I/O, TARGET is a process name.
8594   For network I/O, TARGET is a service name or a port number
8595
8596 This function looks up what specified for TARGET in,
8597 `file-coding-system-alist', `process-coding-system-alist',
8598 or `network-coding-system-alist' depending on OPERATION.
8599 They may specify a coding system, a cons of coding systems,
8600 or a function symbol to call.
8601 In the last case, we call the function with one argument,
8602 which is a list of all the arguments given to this function.
8603 If the function can't decide a coding system, it can return
8604 `undecided' so that the normal code-detection is performed.
8605
8606 If OPERATION is `insert-file-contents', the argument corresponding to
8607 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
8608 file name to look up, and BUFFER is a buffer that contains the file's
8609 contents (not yet decoded).  If `file-coding-system-alist' specifies a
8610 function to call for FILENAME, that function should examine the
8611 contents of BUFFER instead of reading the file.
8612
8613 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
8614      (nargs, args)
8615      int nargs;
8616      Lisp_Object *args;
8617 {
8618   Lisp_Object operation, target_idx, target, val;
8619   register Lisp_Object chain;
8620
8621   if (nargs < 2)
8622     error ("Too few arguments");
8623   operation = args[0];
8624   if (!SYMBOLP (operation)
8625       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8626     error ("Invalid first arguement");
8627   if (nargs < 1 + XINT (target_idx))
8628     error ("Too few arguments for operation: %s",
8629            SDATA (SYMBOL_NAME (operation)));
8630   target = args[XINT (target_idx) + 1];
8631   if (!(STRINGP (target)
8632         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8633             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
8634         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8635     error ("Invalid %dth argument", XINT (target_idx) + 1);
8636   if (CONSP (target))
8637     target = XCAR (target);
8638
8639   chain = ((EQ (operation, Qinsert_file_contents)
8640             || EQ (operation, Qwrite_region))
8641            ? Vfile_coding_system_alist
8642            : (EQ (operation, Qopen_network_stream)
8643               ? Vnetwork_coding_system_alist
8644               : Vprocess_coding_system_alist));
8645   if (NILP (chain))
8646     return Qnil;
8647
8648   for (; CONSP (chain); chain = XCDR (chain))
8649     {
8650       Lisp_Object elt;
8651
8652       elt = XCAR (chain);
8653       if (CONSP (elt)
8654           && ((STRINGP (target)
8655                && STRINGP (XCAR (elt))
8656                && fast_string_match (XCAR (elt), target) >= 0)
8657               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8658         {
8659           val = XCDR (elt);
8660           /* Here, if VAL is both a valid coding system and a valid
8661              function symbol, we return VAL as a coding system.  */
8662           if (CONSP (val))
8663             return val;
8664           if (! SYMBOLP (val))
8665             return Qnil;
8666           if (! NILP (Fcoding_system_p (val)))
8667             return Fcons (val, val);
8668           if (! NILP (Ffboundp (val)))
8669             {
8670               /* We use call1 rather than safe_call1
8671                  so as to get bug reports about functions called here
8672                  which don't handle the current interface.  */
8673               val = call1 (val, Flist (nargs, args));
8674               if (CONSP (val))
8675                 return val;
8676               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8677                 return Fcons (val, val);
8678             }
8679           return Qnil;
8680         }
8681     }
8682   return Qnil;
8683 }
8684
8685 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8686        Sset_coding_system_priority, 0, MANY, 0,
8687        doc: /* Assign higher priority to the coding systems given as arguments.
8688 If multiple coding systems belongs to the same category,
8689 all but the first one are ignored.
8690
8691 usage: (set-coding-system-priority ...)  */)
8692      (nargs, args)
8693      int nargs;
8694      Lisp_Object *args;
8695 {
8696   int i, j;
8697   int changed[coding_category_max];
8698   enum coding_category priorities[coding_category_max];
8699
8700   bzero (changed, sizeof changed);
8701
8702   for (i = j = 0; i < nargs; i++)
8703     {
8704       enum coding_category category;
8705       Lisp_Object spec, attrs;
8706
8707       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8708       attrs = AREF (spec, 0);
8709       category = XINT (CODING_ATTR_CATEGORY (attrs));
8710       if (changed[category])
8711         /* Ignore this coding system because a coding system of the
8712            same category already had a higher priority.  */
8713         continue;
8714       changed[category] = 1;
8715       priorities[j++] = category;
8716       if (coding_categories[category].id >= 0
8717           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8718         setup_coding_system (args[i], &coding_categories[category]);
8719       Fset (AREF (Vcoding_category_table, category), args[i]);
8720     }
8721
8722   /* Now we have decided top J priorities.  Reflect the order of the
8723      original priorities to the remaining priorities.  */
8724
8725   for (i = j, j = 0; i < coding_category_max; i++, j++)
8726     {
8727       while (j < coding_category_max
8728              && changed[coding_priorities[j]])
8729         j++;
8730       if (j == coding_category_max)
8731         abort ();
8732       priorities[i] = coding_priorities[j];
8733     }
8734
8735   bcopy (priorities, coding_priorities, sizeof priorities);
8736
8737   /* Update `coding-category-list'.  */
8738   Vcoding_category_list = Qnil;
8739   for (i = coding_category_max - 1; i >= 0; i--)
8740     Vcoding_category_list
8741       = Fcons (AREF (Vcoding_category_table, priorities[i]),
8742                Vcoding_category_list);
8743
8744   return Qnil;
8745 }
8746
8747 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8748        Scoding_system_priority_list, 0, 1, 0,
8749        doc: /* Return a list of coding systems ordered by their priorities.
8750 HIGHESTP non-nil means just return the highest priority one.  */)
8751      (highestp)
8752      Lisp_Object highestp;
8753 {
8754   int i;
8755   Lisp_Object val;
8756
8757   for (i = 0, val = Qnil; i < coding_category_max; i++)
8758     {
8759       enum coding_category category = coding_priorities[i];
8760       int id = coding_categories[category].id;
8761       Lisp_Object attrs;
8762
8763       if (id < 0)
8764         continue;
8765       attrs = CODING_ID_ATTRS (id);
8766       if (! NILP (highestp))
8767         return CODING_ATTR_BASE_NAME (attrs);
8768       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8769     }
8770   return Fnreverse (val);
8771 }
8772
8773 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8774
8775 static Lisp_Object
8776 make_subsidiaries (base)
8777      Lisp_Object base;
8778 {
8779   Lisp_Object subsidiaries;
8780   int base_name_len = SBYTES (SYMBOL_NAME (base));
8781   char *buf = (char *) alloca (base_name_len + 6);
8782   int i;
8783
8784   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
8785   subsidiaries = Fmake_vector (make_number (3), Qnil);
8786   for (i = 0; i < 3; i++)
8787     {
8788       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8789       ASET (subsidiaries, i, intern (buf));
8790     }
8791   return subsidiaries;
8792 }
8793
8794
8795 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8796        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
8797        doc: /* For internal use only.
8798 usage: (define-coding-system-internal ...)  */)
8799      (nargs, args)
8800      int nargs;
8801      Lisp_Object *args;
8802 {
8803   Lisp_Object name;
8804   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
8805   Lisp_Object attrs;            /* Vector of attributes.  */
8806   Lisp_Object eol_type;
8807   Lisp_Object aliases;
8808   Lisp_Object coding_type, charset_list, safe_charsets;
8809   enum coding_category category;
8810   Lisp_Object tail, val;
8811   int max_charset_id = 0;
8812   int i;
8813
8814   if (nargs < coding_arg_max)
8815     goto short_args;
8816
8817   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
8818
8819   name = args[coding_arg_name];
8820   CHECK_SYMBOL (name);
8821   CODING_ATTR_BASE_NAME (attrs) = name;
8822
8823   val = args[coding_arg_mnemonic];
8824   if (! STRINGP (val))
8825     CHECK_CHARACTER (val);
8826   CODING_ATTR_MNEMONIC (attrs) = val;
8827
8828   coding_type = args[coding_arg_coding_type];
8829   CHECK_SYMBOL (coding_type);
8830   CODING_ATTR_TYPE (attrs) = coding_type;
8831
8832   charset_list = args[coding_arg_charset_list];
8833   if (SYMBOLP (charset_list))
8834     {
8835       if (EQ (charset_list, Qiso_2022))
8836         {
8837           if (! EQ (coding_type, Qiso_2022))
8838             error ("Invalid charset-list");
8839           charset_list = Viso_2022_charset_list;
8840         }
8841       else if (EQ (charset_list, Qemacs_mule))
8842         {
8843           if (! EQ (coding_type, Qemacs_mule))
8844             error ("Invalid charset-list");
8845           charset_list = Vemacs_mule_charset_list;
8846         }
8847       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8848         if (max_charset_id < XFASTINT (XCAR (tail)))
8849           max_charset_id = XFASTINT (XCAR (tail));
8850     }
8851   else
8852     {
8853       charset_list = Fcopy_sequence (charset_list);
8854       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8855         {
8856           struct charset *charset;
8857
8858           val = XCAR (tail);
8859           CHECK_CHARSET_GET_CHARSET (val, charset);
8860           if (EQ (coding_type, Qiso_2022)
8861               ? CHARSET_ISO_FINAL (charset) < 0
8862               : EQ (coding_type, Qemacs_mule)
8863               ? CHARSET_EMACS_MULE_ID (charset) < 0
8864               : 0)
8865             error ("Can't handle charset `%s'",
8866                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8867
8868           XSETCAR (tail, make_number (charset->id));
8869           if (max_charset_id < charset->id)
8870             max_charset_id = charset->id;
8871         }
8872     }
8873   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
8874
8875   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8876                                 make_number (255));
8877   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8878     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
8879   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8880
8881   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8882
8883   val = args[coding_arg_decode_translation_table];
8884   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8885     CHECK_SYMBOL (val);
8886   CODING_ATTR_DECODE_TBL (attrs) = val;
8887
8888   val = args[coding_arg_encode_translation_table];
8889   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8890     CHECK_SYMBOL (val);
8891   CODING_ATTR_ENCODE_TBL (attrs) = val;
8892
8893   val = args[coding_arg_post_read_conversion];
8894   CHECK_SYMBOL (val);
8895   CODING_ATTR_POST_READ (attrs) = val;
8896
8897   val = args[coding_arg_pre_write_conversion];
8898   CHECK_SYMBOL (val);
8899   CODING_ATTR_PRE_WRITE (attrs) = val;
8900
8901   val = args[coding_arg_default_char];
8902   if (NILP (val))
8903     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8904   else
8905     {
8906       CHECK_CHARACTER (val);
8907       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8908     }
8909
8910   val = args[coding_arg_for_unibyte];
8911   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
8912
8913   val = args[coding_arg_plist];
8914   CHECK_LIST (val);
8915   CODING_ATTR_PLIST (attrs) = val;
8916
8917   if (EQ (coding_type, Qcharset))
8918     {
8919       /* Generate a lisp vector of 256 elements.  Each element is nil,
8920          integer, or a list of charset IDs.
8921
8922          If Nth element is nil, the byte code N is invalid in this
8923          coding system.
8924
8925          If Nth element is a number NUM, N is the first byte of a
8926          charset whose ID is NUM.
8927
8928          If Nth element is a list of charset IDs, N is the first byte
8929          of one of them.  The list is sorted by dimensions of the
8930          charsets.  A charset of smaller dimension comes firtst. */
8931       val = Fmake_vector (make_number (256), Qnil);
8932
8933       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8934         {
8935           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8936           int dim = CHARSET_DIMENSION (charset);
8937           int idx = (dim - 1) * 4;
8938
8939           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8940             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8941
8942           for (i = charset->code_space[idx];
8943                i <= charset->code_space[idx + 1]; i++)
8944             {
8945               Lisp_Object tmp, tmp2;
8946               int dim2;
8947
8948               tmp = AREF (val, i);
8949               if (NILP (tmp))
8950                 tmp = XCAR (tail);
8951               else if (NUMBERP (tmp))
8952                 {
8953                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8954                   if (dim < dim2)
8955                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8956                   else
8957                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8958                 }
8959               else
8960                 {
8961                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8962                     {
8963                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8964                       if (dim < dim2)
8965                         break;
8966                     }
8967                   if (NILP (tmp2))
8968                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8969                   else
8970                     {
8971                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8972                       XSETCAR (tmp2, XCAR (tail));
8973                     }
8974                 }
8975               ASET (val, i, tmp);
8976             }
8977         }
8978       ASET (attrs, coding_attr_charset_valids, val);
8979       category = coding_category_charset;
8980     }
8981   else if (EQ (coding_type, Qccl))
8982     {
8983       Lisp_Object valids;
8984
8985       if (nargs < coding_arg_ccl_max)
8986         goto short_args;
8987
8988       val = args[coding_arg_ccl_decoder];
8989       CHECK_CCL_PROGRAM (val);
8990       if (VECTORP (val))
8991         val = Fcopy_sequence (val);
8992       ASET (attrs, coding_attr_ccl_decoder, val);
8993
8994       val = args[coding_arg_ccl_encoder];
8995       CHECK_CCL_PROGRAM (val);
8996       if (VECTORP (val))
8997         val = Fcopy_sequence (val);
8998       ASET (attrs, coding_attr_ccl_encoder, val);
8999
9000       val = args[coding_arg_ccl_valids];
9001       valids = Fmake_string (make_number (256), make_number (0));
9002       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9003         {
9004           int from, to;
9005
9006           val = Fcar (tail);
9007           if (INTEGERP (val))
9008             {
9009               from = to = XINT (val);
9010               if (from < 0 || from > 255)
9011                 args_out_of_range_3 (val, make_number (0), make_number (255));
9012             }
9013           else
9014             {
9015               CHECK_CONS (val);
9016               CHECK_NATNUM_CAR (val);
9017               CHECK_NATNUM_CDR (val);
9018               from = XINT (XCAR (val));
9019               if (from > 255)
9020                 args_out_of_range_3 (XCAR (val),
9021                                      make_number (0), make_number (255));
9022               to = XINT (XCDR (val));
9023               if (to < from || to > 255)
9024                 args_out_of_range_3 (XCDR (val),
9025                                      XCAR (val), make_number (255));
9026             }
9027           for (i = from; i <= to; i++)
9028             SSET (valids, i, 1);
9029         }
9030       ASET (attrs, coding_attr_ccl_valids, valids);
9031
9032       category = coding_category_ccl;
9033     }
9034   else if (EQ (coding_type, Qutf_16))
9035     {
9036       Lisp_Object bom, endian;
9037
9038       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9039
9040       if (nargs < coding_arg_utf16_max)
9041         goto short_args;
9042
9043       bom = args[coding_arg_utf16_bom];
9044       if (! NILP (bom) && ! EQ (bom, Qt))
9045         {
9046           CHECK_CONS (bom);
9047           val = XCAR (bom);
9048           CHECK_CODING_SYSTEM (val);
9049           val = XCDR (bom);
9050           CHECK_CODING_SYSTEM (val);
9051         }
9052       ASET (attrs, coding_attr_utf_16_bom, bom);
9053
9054       endian = args[coding_arg_utf16_endian];
9055       CHECK_SYMBOL (endian);
9056       if (NILP (endian))
9057         endian = Qbig;
9058       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9059         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9060       ASET (attrs, coding_attr_utf_16_endian, endian);
9061
9062       category = (CONSP (bom)
9063                   ? coding_category_utf_16_auto
9064                   : NILP (bom)
9065                   ? (EQ (endian, Qbig)
9066                      ? coding_category_utf_16_be_nosig
9067                      : coding_category_utf_16_le_nosig)
9068                   : (EQ (endian, Qbig)
9069                      ? coding_category_utf_16_be
9070                      : coding_category_utf_16_le));
9071     }
9072   else if (EQ (coding_type, Qiso_2022))
9073     {
9074       Lisp_Object initial, reg_usage, request, flags;
9075       int i;
9076
9077       if (nargs < coding_arg_iso2022_max)
9078         goto short_args;
9079
9080       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9081       CHECK_VECTOR (initial);
9082       for (i = 0; i < 4; i++)
9083         {
9084           val = Faref (initial, make_number (i));
9085           if (! NILP (val))
9086             {
9087               struct charset *charset;
9088
9089               CHECK_CHARSET_GET_CHARSET (val, charset);
9090               ASET (initial, i, make_number (CHARSET_ID (charset)));
9091               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9092                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9093             }
9094           else
9095             ASET (initial, i, make_number (-1));
9096         }
9097
9098       reg_usage = args[coding_arg_iso2022_reg_usage];
9099       CHECK_CONS (reg_usage);
9100       CHECK_NUMBER_CAR (reg_usage);
9101       CHECK_NUMBER_CDR (reg_usage);
9102
9103       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9104       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9105         {
9106           int id;
9107           Lisp_Object tmp;
9108
9109           val = Fcar (tail);
9110           CHECK_CONS (val);
9111           tmp = XCAR (val);
9112           CHECK_CHARSET_GET_ID (tmp, id);
9113           CHECK_NATNUM_CDR (val);
9114           if (XINT (XCDR (val)) >= 4)
9115             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9116           XSETCAR (val, make_number (id));
9117         }
9118
9119       flags = args[coding_arg_iso2022_flags];
9120       CHECK_NATNUM (flags);
9121       i = XINT (flags);
9122       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9123         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9124
9125       ASET (attrs, coding_attr_iso_initial, initial);
9126       ASET (attrs, coding_attr_iso_usage, reg_usage);
9127       ASET (attrs, coding_attr_iso_request, request);
9128       ASET (attrs, coding_attr_iso_flags, flags);
9129       setup_iso_safe_charsets (attrs);
9130
9131       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9132         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9133                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9134                     ? coding_category_iso_7_else
9135                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9136                     ? coding_category_iso_7
9137                     : coding_category_iso_7_tight);
9138       else
9139         {
9140           int id = XINT (AREF (initial, 1));
9141
9142           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9143                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9144                        || id < 0)
9145                       ? coding_category_iso_8_else
9146                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9147                       ? coding_category_iso_8_1
9148                       : coding_category_iso_8_2);
9149         }
9150       if (category != coding_category_iso_8_1
9151           && category != coding_category_iso_8_2)
9152         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9153     }
9154   else if (EQ (coding_type, Qemacs_mule))
9155     {
9156       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9157         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9158       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9159       category = coding_category_emacs_mule;
9160     }
9161   else if (EQ (coding_type, Qshift_jis))
9162     {
9163
9164       struct charset *charset;
9165
9166       if (XINT (Flength (charset_list)) != 3
9167           && XINT (Flength (charset_list)) != 4)
9168         error ("There should be three or four charsets");
9169
9170       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9171       if (CHARSET_DIMENSION (charset) != 1)
9172         error ("Dimension of charset %s is not one",
9173                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9174       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9175         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9176
9177       charset_list = XCDR (charset_list);
9178       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9179       if (CHARSET_DIMENSION (charset) != 1)
9180         error ("Dimension of charset %s is not one",
9181                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9182
9183       charset_list = XCDR (charset_list);
9184       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9185       if (CHARSET_DIMENSION (charset) != 2)
9186         error ("Dimension of charset %s is not two",
9187                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9188
9189       charset_list = XCDR (charset_list);
9190       if (! NILP (charset_list))
9191         {
9192           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9193           if (CHARSET_DIMENSION (charset) != 2)
9194             error ("Dimension of charset %s is not two",
9195                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9196         }
9197
9198       category = coding_category_sjis;
9199       Vsjis_coding_system = name;
9200     }
9201   else if (EQ (coding_type, Qbig5))
9202     {
9203       struct charset *charset;
9204
9205       if (XINT (Flength (charset_list)) != 2)
9206         error ("There should be just two charsets");
9207
9208       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9209       if (CHARSET_DIMENSION (charset) != 1)
9210         error ("Dimension of charset %s is not one",
9211                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9212       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9213         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9214
9215       charset_list = XCDR (charset_list);
9216       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9217       if (CHARSET_DIMENSION (charset) != 2)
9218         error ("Dimension of charset %s is not two",
9219                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9220
9221       category = coding_category_big5;
9222       Vbig5_coding_system = name;
9223     }
9224   else if (EQ (coding_type, Qraw_text))
9225     {
9226       category = coding_category_raw_text;
9227       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9228     }
9229   else if (EQ (coding_type, Qutf_8))
9230     {
9231       category = coding_category_utf_8;
9232       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9233     }
9234   else if (EQ (coding_type, Qundecided))
9235     category = coding_category_undecided;
9236   else
9237     error ("Invalid coding system type: %s",
9238            SDATA (SYMBOL_NAME (coding_type)));
9239
9240   CODING_ATTR_CATEGORY (attrs) = make_number (category);
9241   CODING_ATTR_PLIST (attrs)
9242     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9243                                 CODING_ATTR_PLIST (attrs)));
9244   CODING_ATTR_PLIST (attrs)
9245     = Fcons (QCascii_compatible_p,
9246              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9247                     CODING_ATTR_PLIST (attrs)));
9248
9249   eol_type = args[coding_arg_eol_type];
9250   if (! NILP (eol_type)
9251       && ! EQ (eol_type, Qunix)
9252       && ! EQ (eol_type, Qdos)
9253       && ! EQ (eol_type, Qmac))
9254     error ("Invalid eol-type");
9255
9256   aliases = Fcons (name, Qnil);
9257
9258   if (NILP (eol_type))
9259     {
9260       eol_type = make_subsidiaries (name);
9261       for (i = 0; i < 3; i++)
9262         {
9263           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9264
9265           this_name = AREF (eol_type, i);
9266           this_aliases = Fcons (this_name, Qnil);
9267           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9268           this_spec = Fmake_vector (make_number (3), attrs);
9269           ASET (this_spec, 1, this_aliases);
9270           ASET (this_spec, 2, this_eol_type);
9271           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9272           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
9273           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9274           if (NILP (val))
9275             Vcoding_system_alist
9276               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9277                        Vcoding_system_alist);
9278         }
9279     }
9280
9281   spec_vec = Fmake_vector (make_number (3), attrs);
9282   ASET (spec_vec, 1, aliases);
9283   ASET (spec_vec, 2, eol_type);
9284
9285   Fputhash (name, spec_vec, Vcoding_system_hash_table);
9286   Vcoding_system_list = Fcons (name, Vcoding_system_list);
9287   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9288   if (NILP (val))
9289     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9290                                   Vcoding_system_alist);
9291
9292   {
9293     int id = coding_categories[category].id;
9294
9295     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9296       setup_coding_system (name, &coding_categories[category]);
9297   }
9298
9299   return Qnil;
9300
9301  short_args:
9302   return Fsignal (Qwrong_number_of_arguments,
9303                   Fcons (intern ("define-coding-system-internal"),
9304                          make_number (nargs)));
9305 }
9306
9307
9308 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9309        3, 3, 0,
9310        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
9311   (coding_system, prop, val)
9312      Lisp_Object coding_system, prop, val;
9313 {
9314   Lisp_Object spec, attrs;
9315
9316   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9317   attrs = AREF (spec, 0);
9318   if (EQ (prop, QCmnemonic))
9319     {
9320       if (! STRINGP (val))
9321         CHECK_CHARACTER (val);
9322       CODING_ATTR_MNEMONIC (attrs) = val;
9323     }
9324   else if (EQ (prop, QCdefalut_char))
9325     {
9326       if (NILP (val))
9327         val = make_number (' ');
9328       else
9329         CHECK_CHARACTER (val);
9330       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9331     }
9332   else if (EQ (prop, QCdecode_translation_table))
9333     {
9334       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9335         CHECK_SYMBOL (val);
9336       CODING_ATTR_DECODE_TBL (attrs) = val;
9337     }
9338   else if (EQ (prop, QCencode_translation_table))
9339     {
9340       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9341         CHECK_SYMBOL (val);
9342       CODING_ATTR_ENCODE_TBL (attrs) = val;
9343     }
9344   else if (EQ (prop, QCpost_read_conversion))
9345     {
9346       CHECK_SYMBOL (val);
9347       CODING_ATTR_POST_READ (attrs) = val;
9348     }
9349   else if (EQ (prop, QCpre_write_conversion))
9350     {
9351       CHECK_SYMBOL (val);
9352       CODING_ATTR_PRE_WRITE (attrs) = val;
9353     }
9354   else if (EQ (prop, QCascii_compatible_p))
9355     {
9356       CODING_ATTR_ASCII_COMPAT (attrs) = val;
9357     }
9358
9359   CODING_ATTR_PLIST (attrs)
9360     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9361   return val;
9362 }
9363
9364
9365 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9366        Sdefine_coding_system_alias, 2, 2, 0,
9367        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
9368      (alias, coding_system)
9369      Lisp_Object alias, coding_system;
9370 {
9371   Lisp_Object spec, aliases, eol_type, val;
9372
9373   CHECK_SYMBOL (alias);
9374   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9375   aliases = AREF (spec, 1);
9376   /* ALISES should be a list of length more than zero, and the first
9377      element is a base coding system.  Append ALIAS at the tail of the
9378      list.  */
9379   while (!NILP (XCDR (aliases)))
9380     aliases = XCDR (aliases);
9381   XSETCDR (aliases, Fcons (alias, Qnil));
9382
9383   eol_type = AREF (spec, 2);
9384   if (VECTORP (eol_type))
9385     {
9386       Lisp_Object subsidiaries;
9387       int i;
9388
9389       subsidiaries = make_subsidiaries (alias);
9390       for (i = 0; i < 3; i++)
9391         Fdefine_coding_system_alias (AREF (subsidiaries, i),
9392                                      AREF (eol_type, i));
9393     }
9394
9395   Fputhash (alias, spec, Vcoding_system_hash_table);
9396   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
9397   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9398   if (NILP (val))
9399     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9400                                   Vcoding_system_alist);
9401
9402   return Qnil;
9403 }
9404
9405 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9406        1, 1, 0,
9407        doc: /* Return the base of CODING-SYSTEM.
9408 Any alias or subsidiary coding system is not a base coding system.  */)
9409   (coding_system)
9410      Lisp_Object coding_system;
9411 {
9412   Lisp_Object spec, attrs;
9413
9414   if (NILP (coding_system))
9415     return (Qno_conversion);
9416   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9417   attrs = AREF (spec, 0);
9418   return CODING_ATTR_BASE_NAME (attrs);
9419 }
9420
9421 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9422        1, 1, 0,
9423        doc: "Return the property list of CODING-SYSTEM.")
9424      (coding_system)
9425      Lisp_Object coding_system;
9426 {
9427   Lisp_Object spec, attrs;
9428
9429   if (NILP (coding_system))
9430     coding_system = Qno_conversion;
9431   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9432   attrs = AREF (spec, 0);
9433   return CODING_ATTR_PLIST (attrs);
9434 }
9435
9436
9437 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9438        1, 1, 0,
9439        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
9440      (coding_system)
9441      Lisp_Object coding_system;
9442 {
9443   Lisp_Object spec;
9444
9445   if (NILP (coding_system))
9446     coding_system = Qno_conversion;
9447   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9448   return AREF (spec, 1);
9449 }
9450
9451 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9452        Scoding_system_eol_type, 1, 1, 0,
9453        doc: /* Return eol-type of CODING-SYSTEM.
9454 An eol-type is integer 0, 1, 2, or a vector of coding systems.
9455
9456 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9457 and CR respectively.
9458
9459 A vector value indicates that a format of end-of-line should be
9460 detected automatically.  Nth element of the vector is the subsidiary
9461 coding system whose eol-type is N.  */)
9462      (coding_system)
9463      Lisp_Object coding_system;
9464 {
9465   Lisp_Object spec, eol_type;
9466   int n;
9467
9468   if (NILP (coding_system))
9469     coding_system = Qno_conversion;
9470   if (! CODING_SYSTEM_P (coding_system))
9471     return Qnil;
9472   spec = CODING_SYSTEM_SPEC (coding_system);
9473   eol_type = AREF (spec, 2);
9474   if (VECTORP (eol_type))
9475     return Fcopy_sequence (eol_type);
9476   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9477   return make_number (n);
9478 }
9479
9480 #endif /* emacs */
9481
9482 \f
9483 /*** 9. Post-amble ***/
9484
9485 void
9486 init_coding_once ()
9487 {
9488   int i;
9489
9490   for (i = 0; i < coding_category_max; i++)
9491     {
9492       coding_categories[i].id = -1;
9493       coding_priorities[i] = i;
9494     }
9495
9496   /* ISO2022 specific initialize routine.  */
9497   for (i = 0; i < 0x20; i++)
9498     iso_code_class[i] = ISO_control_0;
9499   for (i = 0x21; i < 0x7F; i++)
9500     iso_code_class[i] = ISO_graphic_plane_0;
9501   for (i = 0x80; i < 0xA0; i++)
9502     iso_code_class[i] = ISO_control_1;
9503   for (i = 0xA1; i < 0xFF; i++)
9504     iso_code_class[i] = ISO_graphic_plane_1;
9505   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9506   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
9507   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9508   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9509   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9510   iso_code_class[ISO_CODE_ESC] = ISO_escape;
9511   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9512   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9513   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9514
9515   for (i = 0; i < 256; i++)
9516     {
9517       emacs_mule_bytes[i] = 1;
9518     }
9519   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9520   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9521   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9522   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
9523 }
9524
9525 #ifdef emacs
9526
9527 void
9528 syms_of_coding ()
9529 {
9530   staticpro (&Vcoding_system_hash_table);
9531   {
9532     Lisp_Object args[2];
9533     args[0] = QCtest;
9534     args[1] = Qeq;
9535     Vcoding_system_hash_table = Fmake_hash_table (2, args);
9536   }
9537
9538   staticpro (&Vsjis_coding_system);
9539   Vsjis_coding_system = Qnil;
9540
9541   staticpro (&Vbig5_coding_system);
9542   Vbig5_coding_system = Qnil;
9543
9544   staticpro (&Vcode_conversion_reused_workbuf);
9545   Vcode_conversion_reused_workbuf = Qnil;
9546
9547   staticpro (&Vcode_conversion_workbuf_name);
9548   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9549
9550   reused_workbuf_in_use = 0;
9551
9552   DEFSYM (Qcharset, "charset");
9553   DEFSYM (Qtarget_idx, "target-idx");
9554   DEFSYM (Qcoding_system_history, "coding-system-history");
9555   Fset (Qcoding_system_history, Qnil);
9556
9557   /* Target FILENAME is the first argument.  */
9558   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9559   /* Target FILENAME is the third argument.  */
9560   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9561
9562   DEFSYM (Qcall_process, "call-process");
9563   /* Target PROGRAM is the first argument.  */
9564   Fput (Qcall_process, Qtarget_idx, make_number (0));
9565
9566   DEFSYM (Qcall_process_region, "call-process-region");
9567   /* Target PROGRAM is the third argument.  */
9568   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9569
9570   DEFSYM (Qstart_process, "start-process");
9571   /* Target PROGRAM is the third argument.  */
9572   Fput (Qstart_process, Qtarget_idx, make_number (2));
9573
9574   DEFSYM (Qopen_network_stream, "open-network-stream");
9575   /* Target SERVICE is the fourth argument.  */
9576   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9577
9578   DEFSYM (Qcoding_system, "coding-system");
9579   DEFSYM (Qcoding_aliases, "coding-aliases");
9580
9581   DEFSYM (Qeol_type, "eol-type");
9582   DEFSYM (Qunix, "unix");
9583   DEFSYM (Qdos, "dos");
9584
9585   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9586   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9587   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9588   DEFSYM (Qdefault_char, "default-char");
9589   DEFSYM (Qundecided, "undecided");
9590   DEFSYM (Qno_conversion, "no-conversion");
9591   DEFSYM (Qraw_text, "raw-text");
9592
9593   DEFSYM (Qiso_2022, "iso-2022");
9594
9595   DEFSYM (Qutf_8, "utf-8");
9596   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9597
9598   DEFSYM (Qutf_16, "utf-16");
9599   DEFSYM (Qbig, "big");
9600   DEFSYM (Qlittle, "little");
9601
9602   DEFSYM (Qshift_jis, "shift-jis");
9603   DEFSYM (Qbig5, "big5");
9604
9605   DEFSYM (Qcoding_system_p, "coding-system-p");
9606
9607   DEFSYM (Qcoding_system_error, "coding-system-error");
9608   Fput (Qcoding_system_error, Qerror_conditions,
9609         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9610   Fput (Qcoding_system_error, Qerror_message,
9611         build_string ("Invalid coding system"));
9612
9613   /* Intern this now in case it isn't already done.
9614      Setting this variable twice is harmless.
9615      But don't staticpro it here--that is done in alloc.c.  */
9616   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9617
9618   DEFSYM (Qtranslation_table, "translation-table");
9619   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9620   DEFSYM (Qtranslation_table_id, "translation-table-id");
9621   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9622   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9623
9624   DEFSYM (Qvalid_codes, "valid-codes");
9625
9626   DEFSYM (Qemacs_mule, "emacs-mule");
9627
9628   DEFSYM (QCcategory, ":category");
9629   DEFSYM (QCmnemonic, ":mnemonic");
9630   DEFSYM (QCdefalut_char, ":default-char");
9631   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9632   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9633   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9634   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9635   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
9636
9637   Vcoding_category_table
9638     = Fmake_vector (make_number (coding_category_max), Qnil);
9639   staticpro (&Vcoding_category_table);
9640   /* Followings are target of code detection.  */
9641   ASET (Vcoding_category_table, coding_category_iso_7,
9642         intern ("coding-category-iso-7"));
9643   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9644         intern ("coding-category-iso-7-tight"));
9645   ASET (Vcoding_category_table, coding_category_iso_8_1,
9646         intern ("coding-category-iso-8-1"));
9647   ASET (Vcoding_category_table, coding_category_iso_8_2,
9648         intern ("coding-category-iso-8-2"));
9649   ASET (Vcoding_category_table, coding_category_iso_7_else,
9650         intern ("coding-category-iso-7-else"));
9651   ASET (Vcoding_category_table, coding_category_iso_8_else,
9652         intern ("coding-category-iso-8-else"));
9653   ASET (Vcoding_category_table, coding_category_utf_8,
9654         intern ("coding-category-utf-8"));
9655   ASET (Vcoding_category_table, coding_category_utf_16_be,
9656         intern ("coding-category-utf-16-be"));
9657   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9658         intern ("coding-category-utf-16-auto"));
9659   ASET (Vcoding_category_table, coding_category_utf_16_le,
9660         intern ("coding-category-utf-16-le"));
9661   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9662         intern ("coding-category-utf-16-be-nosig"));
9663   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9664         intern ("coding-category-utf-16-le-nosig"));
9665   ASET (Vcoding_category_table, coding_category_charset,
9666         intern ("coding-category-charset"));
9667   ASET (Vcoding_category_table, coding_category_sjis,
9668         intern ("coding-category-sjis"));
9669   ASET (Vcoding_category_table, coding_category_big5,
9670         intern ("coding-category-big5"));
9671   ASET (Vcoding_category_table, coding_category_ccl,
9672         intern ("coding-category-ccl"));
9673   ASET (Vcoding_category_table, coding_category_emacs_mule,
9674         intern ("coding-category-emacs-mule"));
9675   /* Followings are NOT target of code detection.  */
9676   ASET (Vcoding_category_table, coding_category_raw_text,
9677         intern ("coding-category-raw-text"));
9678   ASET (Vcoding_category_table, coding_category_undecided,
9679         intern ("coding-category-undecided"));
9680
9681   DEFSYM (Qinsufficient_source, "insufficient-source");
9682   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9683   DEFSYM (Qinvalid_source, "invalid-source");
9684   DEFSYM (Qinterrupted, "interrupted");
9685   DEFSYM (Qinsufficient_memory, "insufficient-memory");
9686   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
9687
9688   defsubr (&Scoding_system_p);
9689   defsubr (&Sread_coding_system);
9690   defsubr (&Sread_non_nil_coding_system);
9691   defsubr (&Scheck_coding_system);
9692   defsubr (&Sdetect_coding_region);
9693   defsubr (&Sdetect_coding_string);
9694   defsubr (&Sfind_coding_systems_region_internal);
9695   defsubr (&Sunencodable_char_position);
9696   defsubr (&Scheck_coding_systems_region);
9697   defsubr (&Sdecode_coding_region);
9698   defsubr (&Sencode_coding_region);
9699   defsubr (&Sdecode_coding_string);
9700   defsubr (&Sencode_coding_string);
9701   defsubr (&Sdecode_sjis_char);
9702   defsubr (&Sencode_sjis_char);
9703   defsubr (&Sdecode_big5_char);
9704   defsubr (&Sencode_big5_char);
9705   defsubr (&Sset_terminal_coding_system_internal);
9706   defsubr (&Sset_safe_terminal_coding_system_internal);
9707   defsubr (&Sterminal_coding_system);
9708   defsubr (&Sset_keyboard_coding_system_internal);
9709   defsubr (&Skeyboard_coding_system);
9710   defsubr (&Sfind_operation_coding_system);
9711   defsubr (&Sset_coding_system_priority);
9712   defsubr (&Sdefine_coding_system_internal);
9713   defsubr (&Sdefine_coding_system_alias);
9714   defsubr (&Scoding_system_put);
9715   defsubr (&Scoding_system_base);
9716   defsubr (&Scoding_system_plist);
9717   defsubr (&Scoding_system_aliases);
9718   defsubr (&Scoding_system_eol_type);
9719   defsubr (&Scoding_system_priority_list);
9720
9721   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
9722                doc: /* List of coding systems.
9723
9724 Do not alter the value of this variable manually.  This variable should be
9725 updated by the functions `define-coding-system' and
9726 `define-coding-system-alias'.  */);
9727   Vcoding_system_list = Qnil;
9728
9729   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
9730                doc: /* Alist of coding system names.
9731 Each element is one element list of coding system name.
9732 This variable is given to `completing-read' as TABLE argument.
9733
9734 Do not alter the value of this variable manually.  This variable should be
9735 updated by the functions `make-coding-system' and
9736 `define-coding-system-alias'.  */);
9737   Vcoding_system_alist = Qnil;
9738
9739   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
9740                doc: /* List of coding-categories (symbols) ordered by priority.
9741
9742 On detecting a coding system, Emacs tries code detection algorithms
9743 associated with each coding-category one by one in this order.  When
9744 one algorithm agrees with a byte sequence of source text, the coding
9745 system bound to the corresponding coding-category is selected.
9746
9747 Don't modify this variable directly, but use `set-coding-priority'.  */);
9748   {
9749     int i;
9750
9751     Vcoding_category_list = Qnil;
9752     for (i = coding_category_max - 1; i >= 0; i--)
9753       Vcoding_category_list
9754         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9755                  Vcoding_category_list);
9756   }
9757
9758   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
9759                doc: /* Specify the coding system for read operations.
9760 It is useful to bind this variable with `let', but do not set it globally.
9761 If the value is a coding system, it is used for decoding on read operation.
9762 If not, an appropriate element is used from one of the coding system alists:
9763 There are three such tables, `file-coding-system-alist',
9764 `process-coding-system-alist', and `network-coding-system-alist'.  */);
9765   Vcoding_system_for_read = Qnil;
9766
9767   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
9768                doc: /* Specify the coding system for write operations.
9769 Programs bind this variable with `let', but you should not set it globally.
9770 If the value is a coding system, it is used for encoding of output,
9771 when writing it to a file and when sending it to a file or subprocess.
9772
9773 If this does not specify a coding system, an appropriate element
9774 is used from one of the coding system alists:
9775 There are three such tables, `file-coding-system-alist',
9776 `process-coding-system-alist', and `network-coding-system-alist'.
9777 For output to files, if the above procedure does not specify a coding system,
9778 the value of `buffer-file-coding-system' is used.  */);
9779   Vcoding_system_for_write = Qnil;
9780
9781   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
9782                doc: /*
9783 Coding system used in the latest file or process I/O.  */);
9784   Vlast_coding_system_used = Qnil;
9785
9786   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9787                doc: /*
9788 Error status of the last code conversion.
9789
9790 When an error was detected in the last code conversion, this variable
9791 is set to one of the following symbols.
9792   `insufficient-source'
9793   `inconsistent-eol'
9794   `invalid-source'
9795   `interrupted'
9796   `insufficient-memory'
9797 When no error was detected, the value doesn't change.  So, to check
9798 the error status of a code conversion by this variable, you must
9799 explicitly set this variable to nil before performing code
9800 conversion.  */);
9801   Vlast_code_conversion_error = Qnil;
9802
9803   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
9804                doc: /*
9805 *Non-nil means always inhibit code conversion of end-of-line format.
9806 See info node `Coding Systems' and info node `Text and Binary' concerning
9807 such conversion.  */);
9808   inhibit_eol_conversion = 0;
9809
9810   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
9811                doc: /*
9812 Non-nil means process buffer inherits coding system of process output.
9813 Bind it to t if the process output is to be treated as if it were a file
9814 read from some filesystem.  */);
9815   inherit_process_coding_system = 0;
9816
9817   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
9818                doc: /*
9819 Alist to decide a coding system to use for a file I/O operation.
9820 The format is ((PATTERN . VAL) ...),
9821 where PATTERN is a regular expression matching a file name,
9822 VAL is a coding system, a cons of coding systems, or a function symbol.
9823 If VAL is a coding system, it is used for both decoding and encoding
9824 the file contents.
9825 If VAL is a cons of coding systems, the car part is used for decoding,
9826 and the cdr part is used for encoding.
9827 If VAL is a function symbol, the function must return a coding system
9828 or a cons of coding systems which are used as above.  The function is
9829 called with an argument that is a list of the arguments with which
9830 `find-operation-coding-system' was called.  If the function can't decide
9831 a coding system, it can return `undecided' so that the normal
9832 code-detection is performed.
9833
9834 See also the function `find-operation-coding-system'
9835 and the variable `auto-coding-alist'.  */);
9836   Vfile_coding_system_alist = Qnil;
9837
9838   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
9839                doc: /*
9840 Alist to decide a coding system to use for a process I/O operation.
9841 The format is ((PATTERN . VAL) ...),
9842 where PATTERN is a regular expression matching a program name,
9843 VAL is a coding system, a cons of coding systems, or a function symbol.
9844 If VAL is a coding system, it is used for both decoding what received
9845 from the program and encoding what sent to the program.
9846 If VAL is a cons of coding systems, the car part is used for decoding,
9847 and the cdr part is used for encoding.
9848 If VAL is a function symbol, the function must return a coding system
9849 or a cons of coding systems which are used as above.
9850
9851 See also the function `find-operation-coding-system'.  */);
9852   Vprocess_coding_system_alist = Qnil;
9853
9854   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
9855                doc: /*
9856 Alist to decide a coding system to use for a network I/O operation.
9857 The format is ((PATTERN . VAL) ...),
9858 where PATTERN is a regular expression matching a network service name
9859 or is a port number to connect to,
9860 VAL is a coding system, a cons of coding systems, or a function symbol.
9861 If VAL is a coding system, it is used for both decoding what received
9862 from the network stream and encoding what sent to the network stream.
9863 If VAL is a cons of coding systems, the car part is used for decoding,
9864 and the cdr part is used for encoding.
9865 If VAL is a function symbol, the function must return a coding system
9866 or a cons of coding systems which are used as above.
9867
9868 See also the function `find-operation-coding-system'.  */);
9869   Vnetwork_coding_system_alist = Qnil;
9870
9871   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
9872                doc: /* Coding system to use with system messages.
9873 Also used for decoding keyboard input on X Window system.  */);
9874   Vlocale_coding_system = Qnil;
9875
9876   /* The eol mnemonics are reset in startup.el system-dependently.  */
9877   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
9878                doc: /*
9879 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
9880   eol_mnemonic_unix = build_string (":");
9881
9882   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
9883                doc: /*
9884 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
9885   eol_mnemonic_dos = build_string ("\\");
9886
9887   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
9888                doc: /*
9889 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
9890   eol_mnemonic_mac = build_string ("/");
9891
9892   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
9893                doc: /*
9894 *String displayed in mode line when end-of-line format is not yet determined.  */);
9895   eol_mnemonic_undecided = build_string (":");
9896
9897   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
9898                doc: /*
9899 *Non-nil enables character translation while encoding and decoding.  */);
9900   Venable_character_translation = Qt;
9901
9902   DEFVAR_LISP ("standard-translation-table-for-decode",
9903                &Vstandard_translation_table_for_decode,
9904                doc: /* Table for translating characters while decoding.  */);
9905   Vstandard_translation_table_for_decode = Qnil;
9906
9907   DEFVAR_LISP ("standard-translation-table-for-encode",
9908                &Vstandard_translation_table_for_encode,
9909                doc: /* Table for translating characters while encoding.  */);
9910   Vstandard_translation_table_for_encode = Qnil;
9911
9912   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
9913                doc: /* Alist of charsets vs revision numbers.
9914 While encoding, if a charset (car part of an element) is found,
9915 designate it with the escape sequence identifying revision (cdr part
9916 of the element).  */);
9917   Vcharset_revision_table = Qnil;
9918
9919   DEFVAR_LISP ("default-process-coding-system",
9920                &Vdefault_process_coding_system,
9921                doc: /* Cons of coding systems used for process I/O by default.
9922 The car part is used for decoding a process output,
9923 the cdr part is used for encoding a text to be sent to a process.  */);
9924   Vdefault_process_coding_system = Qnil;
9925
9926   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
9927                doc: /*
9928 Table of extra Latin codes in the range 128..159 (inclusive).
9929 This is a vector of length 256.
9930 If Nth element is non-nil, the existence of code N in a file
9931 \(or output of subprocess) doesn't prevent it to be detected as
9932 a coding system of ISO 2022 variant which has a flag
9933 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9934 or reading output of a subprocess.
9935 Only 128th through 159th elements has a meaning.  */);
9936   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
9937
9938   DEFVAR_LISP ("select-safe-coding-system-function",
9939                &Vselect_safe_coding_system_function,
9940                doc: /*
9941 Function to call to select safe coding system for encoding a text.
9942
9943 If set, this function is called to force a user to select a proper
9944 coding system which can encode the text in the case that a default
9945 coding system used in each operation can't encode the text.  The
9946 function should take care that the buffer is not modified while
9947 the coding system is being selected.
9948
9949 The default value is `select-safe-coding-system' (which see).  */);
9950   Vselect_safe_coding_system_function = Qnil;
9951
9952   DEFVAR_BOOL ("coding-system-require-warning",
9953                &coding_system_require_warning,
9954                doc: /* Internal use only.
9955 If non-nil, on writing a file, `select-safe-coding-system-function' is
9956 called even if `coding-system-for-write' is non-nil.  The command
9957 `universal-coding-system-argument' binds this variable to t temporarily.  */);
9958   coding_system_require_warning = 0;
9959
9960
9961   DEFVAR_BOOL ("inhibit-iso-escape-detection",
9962                &inhibit_iso_escape_detection,
9963                doc: /*
9964 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9965
9966 By default, on reading a file, Emacs tries to detect how the text is
9967 encoded.  This code detection is sensitive to escape sequences.  If
9968 the sequence is valid as ISO2022, the code is determined as one of
9969 the ISO2022 encodings, and the file is decoded by the corresponding
9970 coding system (e.g. `iso-2022-7bit').
9971
9972 However, there may be a case that you want to read escape sequences in
9973 a file as is.  In such a case, you can set this variable to non-nil.
9974 Then, as the code detection ignores any escape sequences, no file is
9975 detected as encoded in some ISO2022 encoding.  The result is that all
9976 escape sequences become visible in a buffer.
9977
9978 The default value is nil, and it is strongly recommended not to change
9979 it.  That is because many Emacs Lisp source files that contain
9980 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9981 in Emacs's distribution, and they won't be decoded correctly on
9982 reading if you suppress escape sequence detection.
9983
9984 The other way to read escape sequences in a file without decoding is
9985 to explicitly specify some coding system that doesn't use ISO2022's
9986 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
9987   inhibit_iso_escape_detection = 0;
9988
9989   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
9990                doc: /* Char table for translating self-inserting characters.
9991 This is applied to the result of input methods, not their input.  See also
9992 `keyboard-translate-table'.  */);
9993     Vtranslation_table_for_input = Qnil;
9994
9995   {
9996     Lisp_Object args[coding_arg_max];
9997     Lisp_Object plist[16];
9998     int i;
9999
10000     for (i = 0; i < coding_arg_max; i++)
10001       args[i] = Qnil;
10002
10003     plist[0] = intern (":name");
10004     plist[1] = args[coding_arg_name] = Qno_conversion;
10005     plist[2] = intern (":mnemonic");
10006     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10007     plist[4] = intern (":coding-type");
10008     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10009     plist[6] = intern (":ascii-compatible-p");
10010     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10011     plist[8] = intern (":default-char");
10012     plist[9] = args[coding_arg_default_char] = make_number (0);
10013     plist[10] = intern (":for-unibyte");
10014     plist[11] = args[coding_arg_for_unibyte] = Qt;
10015     plist[12] = intern (":docstring");
10016     plist[13] = build_string ("Do no conversion.\n\
10017 \n\
10018 When you visit a file with this coding, the file is read into a\n\
10019 unibyte buffer as is, thus each byte of a file is treated as a\n\
10020 character.");
10021     plist[14] = intern (":eol-type");
10022     plist[15] = args[coding_arg_eol_type] = Qunix;
10023     args[coding_arg_plist] = Flist (16, plist);
10024     Fdefine_coding_system_internal (coding_arg_max, args);
10025
10026     plist[1] = args[coding_arg_name] = Qundecided;
10027     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10028     plist[5] = args[coding_arg_coding_type] = Qundecided;
10029     /* This is already set.
10030        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10031     plist[8] = intern (":charset-list");
10032     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10033     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10034     plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10035     plist[15] = args[coding_arg_eol_type] = Qnil;
10036     args[coding_arg_plist] = Flist (16, plist);
10037     Fdefine_coding_system_internal (coding_arg_max, args);
10038   }
10039
10040   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10041
10042   {
10043     int i;
10044
10045     for (i = 0; i < coding_category_max; i++)
10046       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10047   }
10048 #if defined (MSDOS) || defined (WINDOWSNT)
10049   system_eol_type = Qdos;
10050 #else
10051   system_eol_type = Qunix;
10052 #endif
10053   staticpro (&system_eol_type);
10054 }
10055
10056 char *
10057 emacs_strerror (error_number)
10058      int error_number;
10059 {
10060   char *str;
10061
10062   synchronize_system_messages_locale ();
10063   str = strerror (error_number);
10064
10065   if (! NILP (Vlocale_coding_system))
10066     {
10067       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10068                                                       Vlocale_coding_system,
10069                                                       0);
10070       str = (char *) SDATA (dec);
10071     }
10072
10073   return str;
10074 }
10075
10076 #endif /* emacs */
10077
10078 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10079    (do not change this comment) */